data <- matrix(c(0.0, 0, 2, 0, 0, 1, 0, 1, 0,
2.1, 1, 0, 2, 3, 2, 0, 0, 3,
2.7, 0, 0, 0, 2, 2, 1, 1, 1,
5.9, 3, 0, 1, 0, 0, 0, 2, 0,
7.3, 3, 4, 0, 1, 1, 1, 0, 0,
0.0, 0, 2, 0, 0, 3, 0, 0, 0,
2.0, 1, 0, 2, 1, 0, 0, 0, 1),
byrow = TRUE, nrow=7, ncol=9)
colnames(data) <- c("outcome", "x1", "x2", "x3", "x4", "x5",
"x6", "x7", "x8")
simple_model <- lm(outcome ~ x1, data = as.data.frame(data))
summary(simple_model)
##
## Call:
## lm(formula = outcome ~ x1, data = as.data.frame(data))
##
## Residuals:
## 1 2 3 4 5 6 7
## -0.6632 -0.4829 2.0368 -0.5224 0.8776 -0.6632 -0.5829
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.6632 0.5914 1.121 0.31306
## x1 1.9197 0.3499 5.487 0.00274 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.153 on 5 degrees of freedom
## Multiple R-squared: 0.8576, Adjusted R-squared: 0.8291
## F-statistic: 30.11 on 1 and 5 DF, p-value: 0.002743
library(glmnet)
## Loading required package: Matrix
## Loaded glmnet 4.1-4
coef(glmnet(y=data[, 1], x=data[, 2:9], lambda=1))
## 9 x 1 sparse Matrix of class "dgCMatrix"
## s0
## (Intercept) 1.5311844
## x1 1.1012711
## x2 .
## x3 .
## x4 .
## x5 .
## x6 0.2357701
## x7 .
## x8 .
complex_model <- lm(outcome ~ x1 + x6, data = as.data.frame(data))
summary(complex_model)
##
## Call:
## lm(formula = outcome ~ x1 + x6, data = as.data.frame(data))
##
## Residuals:
## 1 2 3 4 5 6 7
## -2.190e-01 1.000e-01 3.714e-01 3.381e-01 -3.714e-01 -2.190e-01 1.747e-18
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.2190 0.1914 1.145 0.31621
## x1 1.7810 0.1087 16.385 8.12e-05 ***
## x6 2.1095 0.2996 7.040 0.00215 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3522 on 4 degrees of freedom
## Multiple R-squared: 0.9894, Adjusted R-squared: 0.984
## F-statistic: 186.1 on 2 and 4 DF, p-value: 0.0001131
max(abs(data[, 7] - data[, 5] + data[, 9])) # x_6 = x_4 - x_8
## [1] 0
See the literature section.
Rsquared <- function(estimator, data)
{
n <- dim(data)[1]
p <- length(estimator[-1])
s <- nnzero(estimator[-1])
X <- cbind(rep(1, n), data[, 2:(p+1)]) # intercept and x'es
Rsquared <- 1 - sum((data[, 1]-X%*%estimator)^2)/(var(data[, 1])*(n-1))
Rsquared_adjusted <- 1-(1-Rsquared)*(n-1)/(n-s-1)
print(Rsquared)
print(Rsquared_adjusted)
}
estimatorls <- simple_model$coefficients
estimatorlasso <- rep(0, dim(data)[2]) # do not forget the intercept
estimatorlasso[c(1, 2, 7)] <- complex_model$coefficients
Rsquared(estimatorls, data)
## [1] 0.8575922
## [1] 0.8291106
Rsquared(estimatorlasso, data)
## [1] 0.9893652
## [1] 0.9840478
# the comment=NA flag removes the hashtags: copy-paste becomes easier
library(stargazer)
stargazer(data[, 1:2])
% Table created by stargazer v.5.2.3 by Marek Hlavac, Social Policy Institute. E-mail: marek.hlavac at gmail.com
% Date and time: Sun, Oct 12, 2025 - 14:47:56
\begin{table}[!htbp] \centering
\caption{}
\label{}
\begin{tabular}{@{\extracolsep{5pt}} cc}
\\[-1.8ex]\hline
\hline \\[-1.8ex]
outcome & x1 \\
\hline \\[-1.8ex]
$0$ & $0$ \\
$2.100$ & $1$ \\
$2.700$ & $0$ \\
$5.900$ & $3$ \\
$7.300$ & $3$ \\
$0$ & $0$ \\
$2$ & $1$ \\
\hline \\[-1.8ex]
\end{tabular}
\end{table}
stargazer(data)
% Table created by stargazer v.5.2.3 by Marek Hlavac, Social Policy Institute. E-mail: marek.hlavac at gmail.com
% Date and time: Sun, Oct 12, 2025 - 14:47:56
\begin{table}[!htbp] \centering
\caption{}
\label{}
\begin{tabular}{@{\extracolsep{5pt}} ccccccccc}
\\[-1.8ex]\hline
\hline \\[-1.8ex]
outcome & x1 & x2 & x3 & x4 & x5 & x6 & x7 & x8 \\
\hline \\[-1.8ex]
$0$ & $0$ & $2$ & $0$ & $0$ & $1$ & $0$ & $1$ & $0$ \\
$2.100$ & $1$ & $0$ & $2$ & $3$ & $2$ & $0$ & $0$ & $3$ \\
$2.700$ & $0$ & $0$ & $0$ & $2$ & $2$ & $1$ & $1$ & $1$ \\
$5.900$ & $3$ & $0$ & $1$ & $0$ & $0$ & $0$ & $2$ & $0$ \\
$7.300$ & $3$ & $4$ & $0$ & $1$ & $1$ & $1$ & $0$ & $0$ \\
$0$ & $0$ & $2$ & $0$ & $0$ & $3$ & $0$ & $0$ & $0$ \\
$2$ & $1$ & $0$ & $2$ & $1$ & $0$ & $0$ & $0$ & $1$ \\
\hline \\[-1.8ex]
\end{tabular}
\end{table}