"R regsubsets()"의 두 판 사이의 차이

13번째 줄: 13번째 줄:
summary(subsets)
summary(subsets)
</syntaxhighlight>
</syntaxhighlight>
<syntaxhighlight lang='r' run>
options(echo=T)
library(leaps)
df = swiss
# 모든 조합 확인
subsets = regsubsets(Fertility ~ ., data=df, nbest=32, method="exhaustive")
summ = summary(subsets)
results = data.frame(
  p    = rowSums(summ$which)-1,
  rss  = summ$rss,
  rsq  = summ$rsq,
  adjr2 = summ$adjr2,
  bic  = summ$bic
)
# BIC 최소화하는 변수조합 선택
(idx = which.min(results$bic))
results[idx,]
##    p      rss      rsq    adjr2      bic
## 26 4 2158.069 0.6993476 0.670714 -37.23388
coef(subsets, id=idx)
## (Intercept)      Agriculture        Education        Catholic Infant.Mortality
##  62.1013116      -0.1546175      -0.9802638        0.1246664        1.0784422
</syntaxhighlight>
<syntaxhighlight lang='r' run>
<syntaxhighlight lang='r' run>
options(echo=T)
options(echo=T)

2020년 11월 2일 (월) 02:14 판

1 개요

R regsubsets()
  • "Functions For Model Selection → 모델 선택용 함수"
library(leaps)
subsets = regsubsets(Fertility~., data=swiss)
summary(subsets)
library(leaps)
subsets = regsubsets(as.matrix(swiss[,-1]), swiss[,1])
summary(subsets)
options(echo=T)
library(leaps)

df = swiss
# 모든 조합 확인
subsets = regsubsets(Fertility ~ ., data=df, nbest=32, method="exhaustive")
summ = summary(subsets)
results = data.frame(
  p     = rowSums(summ$which)-1,
  rss   = summ$rss,
  rsq   = summ$rsq,
  adjr2 = summ$adjr2,
  bic   = summ$bic
)

# BIC 최소화하는 변수조합 선택
(idx = which.min(results$bic))
results[idx,]
##    p      rss       rsq    adjr2       bic
## 26 4 2158.069 0.6993476 0.670714 -37.23388
coef(subsets, id=idx)
## (Intercept)      Agriculture        Education         Catholic Infant.Mortality 
##  62.1013116       -0.1546175       -0.9802638        0.1246664        1.0784422
options(echo=T)
library(leaps)

df = swiss
# 모든 조합 확인
subsets = regsubsets(Fertility ~ ., data=df, nbest=32, method="exhaustive")
summ = summary(subsets)
results = data.frame(
  p     = rowSums(summ$which)-1,
  rss   = summ$rss,
  rsq   = summ$rsq,
  adjr2 = summ$adjr2,
  bic   = summ$bic
)

# BIC 최소화하는 변수조합 선택
(idx = which.min(results$bic))
results[idx,]
##    p      rss       rsq    adjr2       bic
## 26 4 2158.069 0.6993476 0.670714 -37.23388

(vars = names(summ$which[idx,])[summ$which[idx,]][-1])
formula = as.formula(paste('Fertility ~',paste(vars,collapse='+')))
model = lm(formula, data=df)
summary(model)

2 같이 보기

3 참고

문서 댓글 ({{ doc_comments.length }})
{{ comment.name }} {{ comment.created | snstime }}