# Example of Multiple Linear Regression
library(scatterplot3d)

N=10
# N = 100
# %*% is dot product
X = 1:N # akin seq(1,N,by=1) 
M = 10*N
Y = sample(1:M,size=N) # taking an SRS from 1:100
#XY = cbind(X,Y)
#XY = expand.grid(X=X,Y=Y) # like mesh in matlab
a = c(1, 2, 3)
Z1 = a[1] + a[2]*X + a[3]*Y  # here we have zero noise \sigma = 0
L1 = lm(Z1 ~ X + Y)


sigma = 3 # variability around regression plane
e =  rnorm(N,mean=0,sd=sigma)
Z2 = a[1] + a[2]*X + a[3]*Y + e
L2 = lm(Z2 ~ X + Y)

sigma = 6 # more variability around regression plane
e =  rnorm(N,mean=0,sd=sigma)
Z3 = a[1] + a[2]*X + a[3]*Y + e
L3 = lm(Z3 ~ X + Y)

S1 = summary(L1)
S2 = summary(L2)
S3 = summary(L3)
# residuals(L1) # it yields the residuals

# Plots 
# pdf("FakeData3DNoiseless.pdf")
par(mfrow=c(1,2))
scatterplot3d(X,Y,Z1, main="3D Scatterplot (noiseless)",box=T)
plt1 = scatterplot3d(X,Y,Z1, main="3D Scatterplot",box=T,
 zlim=c(0,max(Z1)),angle=40)
plt1$plane3d(L1)
#dev.off() # this pairs with pdf() 

x11()
#pdf("FakeData3DNoisySigmaIs3.pdf")
par(mfrow=c(1,2))
scatterplot3d(X,Y,Z2,main="3D Scatterplot (noisy, sigma=3)",box=T)
plt2 = scatterplot3d(X,Y,Z2,main="3D Scatterplot (noisy, sigma=3)",box=T,
  zlim=c(0,max(Z2))) # angle=
plt2$plane3d(L2)
#dev.off() # this pairs with pdf()

x11()
#pdf("FakeData3DNoisySigmaIs6.pdf")
par(mfrow=c(1,2))
scatterplot3d(X,Y,Z3,main="3D Scatterplot (noisy, sigma=6)",box=T)
plt3 = scatterplot3d(X,Y,Z3,main="3D Scatterplot (noisy, sigma=6)",box=T,
 zlim=c(0,max(Z3))) # angle=
plt3$plane3d(L3)
#dev.off() # this pairs with pdf()

X11()
par(mfrow=c(1,2))
plot(X,Z2)
plot(Y,Z2)

# Comments on the summary function.
# First, summary returns the method that was used with lm, next is a 
# five-number summary of the residuals. As before, the residuals are 
# available with the residuals command. More importantly, the regression 
# coefficients are presented in a table which includes their estimates 
# (under Estimate), their standard error (under Std. Error), the t-value 
# for a hypothesis test that bi = 0 under t value and the corresponding 
# p-value for a two-sided test. Small p-values are flagged as shown with 
# the 3 asterisks,***, in the y row. Other tests of hypotheses are easily 
# done knowing the first two columns and the degrees of freedom. The standard 
# error for the residuals is given along with its degrees of freedom. This 
# allows one to investigate the residuals which are again available with the 
# residuals method. The multiple and adjusted R2 is given. R2 is interpreted 
# as the ``fraction of the variance explained by the model.'' Finally the 
# F statistic is given. The p-value for this is from a hypotheses test 
# that b1 = 0 = b2 = ··· = bp. That is, the regression is not appropriate. 
# The theory for this comes from that of the analysis of variance (ANOVA).