Ridge, Lasso & Elastic Net Regression with R | Boston Housing Data Example

{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE)

Load the libraries

# https://stackoverflow.com/questions/4090169/elegant-way-to-check-for-missing-packages-and-install-them
list.of.packages <- c("caret", "glmnet", "mlbench","psych")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages, dependencies = T)
# Libraries Needed
library(caret)
library(glmnet)
library(mlbench)
library(psych)
# Data
data("BostonHousing")
data <- BostonHousing

Data Partition

set.seed(222)
ind <- sample(2, nrow(data), replace = T, prob = c(0.7, 0.3))
train <- data[ind==1,]
test <- data[ind==2,]

Custom Control Parameters

custom <- trainControl(method = "repeatedcv",
                       number = 10,
                       repeats = 5,
                       verboseIter = T)

Linear Model

set.seed(186)
lm <- train(medv ~ .,  # . mean all the features 
            train, 
            method="lm",
            trControl=custom)

Results

summary(lm)
lm$results
plot(lm$finalModel)

Ridge Regression

set.seed(186)
ridge <- train(medv ~ .,  # . mean all the features 
            train, 
            method="glmnet",
            tuneGrid = expand.grid(alpha=0,
                                   lambda=seq(0.0001, 1, length=5)),
            trControl=custom)

Plot Results

plot(ridge)
plot(ridge$finalModel, xvar = "lambda", label = T)
plot(ridge$finalModel, xvar = 'dev', label=T)
plot(varImp(ridge, scale=T))

Lasso Regression

When you have multi-linearity problem, LASSO tends to slect
set.seed(186)
lasso <- train(medv ~ .,  # . mean all the features 
            train, 
            method="glmnet",
            tuneGrid = expand.grid(alpha=1,
                                   lambda=seq(0.0001, 0.2, length=5)),
            trControl=custom)

Plot Results

plot(lasso)
plot(lasso$finalModel, xvar = 'lambda', label=T)
plot(lasso$finalModel, xvar = 'dev', label=T)
plot(varImp(lasso, scale=T))

Elastic Net Regression

set.seed(1234)
en <- train(medv ~ .,  # . mean all the features 
            train, 
            method="glmnet",
            tuneGrid = expand.grid(alpha=seq(0.0001, 1, length=5),
                                   lambda=seq(0.0001, 0.2, length=5)),
            trControl=custom)

Plot Results

plot(en)
plot(en$finalModel, xvar = 'lambda', label=T)
plot(en$finalModel, xvar = 'dev', label=T)
plot(varImp(en))

Compare Models

model_list <- list(LinearModel=lm, Ridge=ridge, Lasso=lasso, ElasticNet=en)
res <- resamples(model_list)
summary(res)
bwplot(res)
xyplot(res, metric="RMSE")

Best Model

en$bestTune
best <- en$finalModel
coef(best, s = en$bestTune$lambda)

Save Final Model for Later Use

saveRDS(en, "final_model.rds")
fm <- readRDS("final_model.rds")
print(fm)

Prediction

p1 <- predict(fm, train)
sqrt(mean((train$medv-p1)^2))

p2 <- predict(fm, test)
sqrt(mean((test$medv-p2)^2))

Reference

Ridge, Lasso & Elastic Net Regression with R | Boston Housing Data Example, Steps & Interpretation
https://www.youtube.com/watch?v=_3xMSbIde2I

Comments

Popular posts from this blog

gspread error:gspread.exceptions.SpreadsheetNotFound

Miniconda installation problem: concurrent.futures.process.BrokenProcessPool: A process in the process pool was terminated abruptly while the future was running or pending.

转载:彻底搞清楚promoter, exon, intron, and UTR