R 엘라스틱넷 회귀분석 (Boston)

1 개요[ | ]

R elastic net regression
R 엘라스틱넷 회귀분석

2 전체 데이터 사용[ | ]

2.1 lambda 생략[ | ]

library(MASS)    # Boston
library(glmnet)  # glmnet()
library(Metrics) # mse()
df <- Boston
x <- as.matrix(df[!(names(df) %in% c("medv"))])
y <- df$medv

model <- glmnet(x, y, alpha=.5)
cnt <- length(model$lambda)
cnt # 모델(lambda값)의 개수
coef(model)[,  1]  # 첫번째 모델의 회귀계수
coef(model)[,cnt]  # 마지막 모델의 회귀계수
pred <- predict(model, newx=x)
mse(y, pred[,  1]) # 첫번째 모델의 MSE
mse(y, pred[,cnt]) # 마지막 모델의 MSE

2.2 lambda 지정[ | ]

library(MASS)    # Boston
library(glmnet)  # glmnet()
library(Metrics) # mse()
df <- Boston
x <- as.matrix(df[!(names(df) %in% c("medv"))])
y <- df$medv

# lambda=1일 때
model1 <- glmnet(x, y, alpha=.5, lambda=1)
coef(model1)
pred1 <- predict(model1, x)
mse(y, pred1) # MSE = 26.40758
# lambda=2일 때
model2 <- glmnet(x, y, alpha=.5, lambda=2)
coef(model2)
pred2 <- predict(model2, x)
mse(y, pred2) # MSE = 29.20631

2.3 cv.glmnet()으로 lambda 찾기[ | ]

set.seed(1)
library(MASS)    # Boston
library(glmnet)  # cv.glmnet()
library(Metrics) # mse()
df <- Boston
x <- as.matrix(df[!(names(df) %in% c("medv"))])
y <- df$medv

cv = cv.glmnet(x, y, alpha=.5)
cv$lambda.min # MSE 최소화 lambda = 0.04237004
cv$lambda.1se # MSE 최소화 lambda + 1 표준편차 = 0.8317411
plot(cv)

3 데이터 분할[ | ]

3.1 lambda 찾기 (alpha=.5)[ | ]

set.seed(1)
library(MASS)    # Boston
library(caret)   # createDataPartition()
library(glmnet)  # glmnet()
library(Metrics) # mse()

df <- Boston

# 데이터 분할 (8:2)
idx <- createDataPartition(df$medv, list=F, p=0.8)
Train <- df[ idx,]
Test  <- df[-idx,]
Train.x <- as.matrix(Train[!(names(df) %in% c("medv"))])
Test.x  <- as.matrix( Test[!(names(df)  %in% c("medv"))])
Train.y <- Train$medv
Test.y  <- Test$medv

lambda <- 10^seq(5, -10, by=-.05)
model <- glmnet(Train.x, Train.y, alpha=.5, lambda=lambda)
pred <- predict(model, newx=Train.x)
mse <- c()
for( i in 1:length(lambda) )
{
  mse = append(mse, mse(Train.y, pred[,i]))
}
idx <- which.min(mse)
idx # 훈련셋 MSE 최소 모델번호
mse[idx] # 훈련셋 MSE 최소값
lambda[idx] # 훈련셋 MSE 최소 lambda
# 훈련셋 MSE 시각화
color = rep("#00000011", length(lambda))
color[idx] = "red"
plot(log(lambda), mse, pch = 16, col = color)

3.2 lambda, alpha 찾기[ | ]

set.seed(1)
library(MASS)    # Boston
library(caret)   # createDataPartition()
library(glmnet)  # glmnet()
library(Metrics) # mse()

df <- Boston

# 데이터 분할 (8:2)
idx <- createDataPartition(df$medv, list=F, p=0.8)
Train <- df[ idx,]
Test  <- df[-idx,]
Train.x <- as.matrix(Train[!(names(df) %in% c("medv"))])
Test.x  <- as.matrix( Test[!(names(df)  %in% c("medv"))])
Train.y <- Train$medv
Test.y  <- Test$medv

alphas <- seq(0,1,by=.05) # alpha값 21개
lambdas <- 10^seq(5, -15, by=-.05) # lambda값 여러개

models <- c()
results <- data.frame(NA,NA,NA)
names(results) <- c("alpha","lambda","mse")

for( alpha in alphas ) {
  for( lambda in lambdas) {
    model <- glmnet(Train.x, Train.y, alpha=alpha, lambda=lambda)
    pred  <- predict(model, newx=Train.x)
    mse   <- mse(Train.y, pred)
    models <- append(models, model)
    results[nrow(results)+1,] <- c(alpha, lambda, mse)
  }
}
idx <- which.min(results$mse)
results[idx,] # 훈련셋 MSE 최소
# 훈련셋 MSE 시각화
library(scatterplot3d)
color <- rep("#00000011", nrow(results))
color[idx] <- "red"
scatterplot3d(x = log(results$lambda),
              y = results$alpha,
              z = results$mse,
              xlab = "log(lambda)",
              ylab = "alpha",
              zlab = "MSE",
              angle = 120,
              pch = 16,
              color = color)

4 같이 보기[ | ]

문서 댓글 ({{ doc_comments.length }})
{{ comment.name }} {{ comment.created | snstime }}