# Example of Multiple Linear Regression library(scatterplot3d) N=10 # N = 100 # %*% is dot product X = 1:N # akin seq(1,N,by=1) M = 10*N Y = sample(1:M,size=N) # taking an SRS from 1:100 #XY = cbind(X,Y) #XY = expand.grid(X=X,Y=Y) # like mesh in matlab a = c(1, 2, 3) Z1 = a[1] + a[2]*X + a[3]*Y # here we have zero noise \sigma = 0 L1 = lm(Z1 ~ X + Y) sigma = 3 # variability around regression plane e = rnorm(N,mean=0,sd=sigma) Z2 = a[1] + a[2]*X + a[3]*Y + e L2 = lm(Z2 ~ X + Y) sigma = 6 # more variability around regression plane e = rnorm(N,mean=0,sd=sigma) Z3 = a[1] + a[2]*X + a[3]*Y + e L3 = lm(Z3 ~ X + Y) S1 = summary(L1) S2 = summary(L2) S3 = summary(L3) # residuals(L1) # it yields the residuals # Plots # pdf("FakeData3DNoiseless.pdf") par(mfrow=c(1,2)) scatterplot3d(X,Y,Z1, main="3D Scatterplot (noiseless)",box=T) plt1 = scatterplot3d(X,Y,Z1, main="3D Scatterplot",box=T, zlim=c(0,max(Z1)),angle=40) plt1$plane3d(L1) #dev.off() # this pairs with pdf() x11() #pdf("FakeData3DNoisySigmaIs3.pdf") par(mfrow=c(1,2)) scatterplot3d(X,Y,Z2,main="3D Scatterplot (noisy, sigma=3)",box=T) plt2 = scatterplot3d(X,Y,Z2,main="3D Scatterplot (noisy, sigma=3)",box=T, zlim=c(0,max(Z2))) # angle= plt2$plane3d(L2) #dev.off() # this pairs with pdf() x11() #pdf("FakeData3DNoisySigmaIs6.pdf") par(mfrow=c(1,2)) scatterplot3d(X,Y,Z3,main="3D Scatterplot (noisy, sigma=6)",box=T) plt3 = scatterplot3d(X,Y,Z3,main="3D Scatterplot (noisy, sigma=6)",box=T, zlim=c(0,max(Z3))) # angle= plt3$plane3d(L3) #dev.off() # this pairs with pdf() X11() par(mfrow=c(1,2)) plot(X,Z2) plot(Y,Z2) # Comments on the summary function. # First, summary returns the method that was used with lm, next is a # five-number summary of the residuals. As before, the residuals are # available with the residuals command. More importantly, the regression # coefficients are presented in a table which includes their estimates # (under Estimate), their standard error (under Std. Error), the t-value # for a hypothesis test that bi = 0 under t value and the corresponding # p-value for a two-sided test. Small p-values are flagged as shown with # the 3 asterisks,***, in the y row. Other tests of hypotheses are easily # done knowing the first two columns and the degrees of freedom. The standard # error for the residuals is given along with its degrees of freedom. This # allows one to investigate the residuals which are again available with the # residuals method. The multiple and adjusted R2 is given. R2 is interpreted # as the ``fraction of the variance explained by the model.'' Finally the # F statistic is given. The p-value for this is from a hypotheses test # that b1 = 0 = b2 = ยทยทยท = bp. That is, the regression is not appropriate. # The theory for this comes from that of the analysis of variance (ANOVA).