Let’s read in the Duncan data and reproduce Figure 3.18 first.
prestige.data <- read.table("./Duncan.txt")
pairs(prestige.data[, c("income", "education", "prestige")])
You need to fill out the following sections for answering the questions.
# construct the design matrix
X <- as.matrix(cbind(1, prestige.data[, c(2, 3)]))
X <- as.matrix(cbind(1, prestige.data[, c("income", "education")]))
# construct the Gramian matrix
system.time(gramian.mat <- t(X) %*% X)
## user system elapsed
## 0 0 0
system.time(gramian.mat <- crossprod(X))
## user system elapsed
## 0 0 0
gramian.mat
## 1 income education
## 1 45 1884 2365
## income 1884 105148 122197
## education 2365 122197 163265
# calculate the inverse of the Gramian matrix
gramian.mat.inv <- solve(gramian.mat)
# construct the Y matrix
Y <- as.matrix(prestige.data[, "prestige"])
# Now calculate the LS estimate
beta.estimate <- solve(t(X) %*% X) %*% t(X) %*% Y
beta.estimate <- solve(t(X) %*% X) %*% (t(X) %*% Y)
beta.estimate <- solve(t(X) %*% X, t(X) %*% Y)
beta.estimate
# calculate sum of squared errors (or residuals)
sse <-
# calculate mean squared errors
mse <-
# calculate the variance-covariance matrix for beta hat
sigma.matrix <-
# standard error for beta_1
sqrt(sigma.matrix[2, 2])
t(0.025, 42) critical value:
qt(1.0 - 0.025, 42) #take a look at the documentation of "qt"
# t statistic
beta.estimate[2] / sqrt(sigma.matrix[2, 2])
a.vector <- c(1, 42, 84)
mean.prestige.est <-
theta.se <-
confidence.interval <-
Use lm
to check
(lmod <- lm(prestige ~ income + education, prestige.data))
##
## Call:
## lm(formula = prestige ~ income + education, data = prestige.data)
##
## Coefficients:
## (Intercept) income education
## -6.0647 0.5987 0.5458
summary(lmod)
##
## Call:
## lm(formula = prestige ~ income + education, data = prestige.data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -29.538 -6.417 0.655 6.605 34.641
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.06466 4.27194 -1.420 0.163
## income 0.59873 0.11967 5.003 1.05e-05 ***
## education 0.54583 0.09825 5.555 1.73e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13.37 on 42 degrees of freedom
## Multiple R-squared: 0.8282, Adjusted R-squared: 0.82
## F-statistic: 101.2 on 2 and 42 DF, p-value: < 2.2e-16
predict(lmod, newdata = data.frame("1" = 1, "income" = 42, "education" = 84), interval = "confidence")
## fit lwr upr
## 1 64.93216 57.5322 72.33212
(lmod.red <- lm(prestige ~ 1, data = prestige.data))
mean(prestige.data[, "prestige"])
# calculate the F statistic
F.statistic <-
# use pf function to get the associated p-value
p.value <- pf(F.statistic, ndf, ddf, lower.tail=FALSE)