52번째 줄: | 52번째 줄: | ||
# 모델 적합 | # 모델 적합 | ||
library(party, warn.conflicts=F, quietly=T) | library(party, warn.conflicts=F, quietly=T, mask.ok) | ||
model = cforest(ploidy ~ ., trainData) | model = cforest(ploidy ~ ., trainData) | ||
2020년 5월 10일 (일) 13:12 판
1 개요
- R Random Forest
- R 랜덤 포레스트
- randomForest() 또는 cforest() 사용
2 randomForest()
R
CPU
3.2s
MEM
216M
3.6s
Copy
set.seed(42) # 랜덤값 고정
data(stagec, package='rpart')
df = stagec
df = na.omit(df) # 결측치 제거
# 데이터 분할
library(caret, quietly=T)
idx = createDataPartition(df$ploidy, p=0.7, list=FALSE)
trainData = df[ idx,]
testData = df[-idx,]
# 모델 적합
library(randomForest, warn.conflict=F)
model = randomForest(ploidy ~ ., trainData, ntree=100, proximity=T)
options(echo=T)
# 모델 정보
model
model$importance
# 시각화
plot(model)
varImpPlot(model)
# 테스트
pred = predict(model, testData)
# 분류표
table(pred, testData$ploidy)
# 정분류율
sum(pred==testData$ploidy)/nrow(testData)
randomForest 4.6-14 Type rfNews() to see new features/changes/bug fixes. > # 모델 정보 > model Call: randomForest(formula = ploidy ~ ., data = trainData, ntree = 100, proximity = T) Type of random forest: classification Number of trees: 100 No. of variables tried at each split: 2 OOB estimate of error rate: 7.37% Confusion matrix: diploid tetraploid aneuploid class.error diploid 44 1 1 0.04347826 tetraploid 1 44 0 0.02222222 aneuploid 3 1 0 1.00000000 > model$importance MeanDecreaseGini pgtime 5.338281 pgstat 1.673620 age 4.631630 eet 1.001239 g2 32.856804 grade 1.410163 gleason 2.773629 > > # 시각화 > plot(model) > varImpPlot(model) > > # 테스트 > pred = predict(model, testData) > # 분류표 > table(pred, testData$ploidy) pred diploid tetraploid aneuploid diploid 19 0 1 tetraploid 0 19 0 aneuploid 0 0 0 > # 정분류율 > sum(pred==testData$ploidy)/nrow(testData) [1] 0.974359 > cat('
3 cforest()
R
Copy
set.seed(42) # 랜덤값 고정
data(stagec, package='rpart')
df = stagec
df = na.omit(df) # 결측치 제거
# 데이터 분할
library(caret, quietly=T)
idx = createDataPartition(df$ploidy, p=0.7, list=FALSE)
trainData = df[ idx,]
testData = df[-idx,]
# 모델 적합
library(party, warn.conflicts=F, quietly=T, mask.ok)
model = cforest(ploidy ~ ., trainData)
options(echo=T)
# 모델 정보
model
# 테스트
pred = predict(model, newdata=testData, OOB=T, type="response")
# 분류표
table(pred, testData$ploidy)
# 정분류율
sum(pred==testData$ploidy)/nrow(testData)
Loading
4 같이 보기
편집자 Jmnote Jmnote bot
로그인하시면 댓글을 쓸 수 있습니다.