R 엘라스틱넷 회귀분석

1 개요[ | ]

R elastic net regression
R 엘라스틱넷 회귀분석

2 전체 데이터 사용[ | ]

2.1 lambda 생략[ | ]

library(glmnet)  # glmnet()
library(Metrics) # mse()

x <- as.matrix(mtcars[, -1])
y <- mtcars[,1]

model <- glmnet(x, y, alpha=.5)
length(model$lambda) # 83개의 lambda값 입력 → 모델 83개
coef(model)[, 1]     #  1번 모델의 회귀계수
coef(model)[,83]     # 83번 모델의 회귀계수
pred <- predict(model, newx=x)
mse(y, pred[, 1])    #  1번 모델의 MSE
mse(y, pred[,83])    # 83번 모델의 MSE

2.2 lambda 지정[ | ]

options(echo=T)
library(glmnet)  # glmnet()
library(Metrics) # mse()

x <- as.matrix(mtcars[, -1])
y <- mtcars[,1]

# lambda=1일 때
model1 <- glmnet(x, y, alpha=.5, lambda=1)
coef(model1)
pred1 <- predict(model1, x)
mse(y, pred1) ## MSE
# lambda=2일 때
model2 <- glmnet(x, y, alpha=.5, lambda=2)
coef(model2)
pred2 <- predict(model2, x)
mse(y, pred2) ## MSE

2.3 cv.glmnet()으로 lambda 찾기[ | ]

set.seed(12345)
library(glmnet) # cv.glmnet()

x <- as.matrix(mtcars[, -1])
y <- mtcars[,1]

cv <- cv.glmnet(x, y, alpha=.5)
cv$lambda.min # MSE 최소화 lambda = 0.834975
cv$lambda.1se # MSE 최소화 lambda + 1 표준편차 = 2.323368
plot(cv)

3 데이터 분할[ | ]

3.1 lambda 찾기 (alpha=.5)[ | ]

set.seed(12345)
library(caret)   # createDataPartition()
library(glmnet)  # glmnet()
library(Metrics) # mse()

df <- mtcars

# 데이터 분할 (7:3)
idx <- createDataPartition(df$mpg, list=F, p=0.7)
Train <- df[ idx,]
Test  <- df[-idx,]
Train.x <- as.matrix(Train[, -1])
Test.x  <- as.matrix(Train[, -1])
Train.y <- Train[,1]
Test.y  <- Test [,1]

lambda <- 10^seq(5, -20, by=-.05)
model <- glmnet(Train.x, Train.y, alpha=.5, lambda=lambda)
pred <- predict(model, newx=Train.x)
mse <- c()
for( i in 1:length(lambda) )
{
  mse <- append(mse, mse(Train.y, pred[,i]))
}
length(lambda) # 확인한 모델(lambda) 수
idx <- which.min(mse) # 훈련셋 MSE 최소 모델번호
idx
mse[idx] # 훈련셋 MSE 최소값
lambda[idx] # 훈련셋 MSE 최소 lambda
# 훈련셋 MSE 시각화
color = rep("#00000011", length(lambda))
color[idx] = "red"
plot(log(lambda), mse, pch = 16, col = color)

3.2 lambda, alpha 찾기[ | ]

set.seed(12345)
library(caret)   # createDataPartition()
library(glmnet)  # glmnet()
library(Metrics) # mse()

df <- mtcars

# 데이터 분할 (7:3)
idx <- createDataPartition(df$mpg, list=F, p=0.7)
Train <- df[ idx,]
Test  <- df[-idx,]
Train.x <- as.matrix(Train[, -1])
Test.x  <- as.matrix(Train[, -1])
Train.y <- Train[,1]
Test.y  <- Test [,1]

alphas <- seq(0,1,by=.1) # alpha값 11개
lambdas <- 10^seq(5, -5, by=-.05) # lambda값 201개

models <- c()
results <- data.frame(NA,NA,NA)
names(results) <- c("alpha","lambda","mse")

for( alpha in alphas ) {
  for( lambda in lambdas) {
    model <- glmnet(Train.x, Train.y, alpha=alpha, lambda=lambda)
    pred  <- predict(model, newx=Train.x)
    mse   <- mse(Train.y, pred)
    models <- append(models, model)
    results[nrow(results)+1,] <- c(alpha, lambda, mse)
  }
}
idx <- which.min(results$mse)
results[idx,] # 훈련셋 MSE 최소
# 훈련셋 MSE 시각화
library(scatterplot3d)
color <- rep("#00000011", nrow(results))
color[idx] <- "red"
scatterplot3d(x = log(results$lambda),
              y = results$alpha,
              z = results$mse,
              xlab = "log(lambda)",
              ylab = "alpha",
              zlab = "MSE",
              angle = 120,
              pch = 16,
              color = color)

4 같이 보기[ | ]

문서 댓글 ({{ doc_comments.length }})
{{ comment.name }} {{ comment.created | snstime }}