library(tm) # TermDocumentMatrix()
zTdmRemoveTerms <- function(tdm, terms) {
tdm[!rownames(tdm) %in% terms,]
}
docs1 <- c("안녕, 안녕.", "반갑다, 친구들.", "반갑다, 친구야, 친구야")
docs1 <- iconv(docs1, "CP949", "UTF-8") # 윈도우 하드코딩 데이터입력시 인코딩 변환
tdm1 <- TermDocumentMatrix(Corpus(VectorSource(docs1)),control=list(removePunctuation=T,stopwords=F))
inspect(tdm1)
## <<TermDocumentMatrix (terms: 4, documents: 3)>>
## Non-/sparse entries: 5/7
## Sparsity : 58%
## Maximal term length: 3
## Weighting : term frequency (tf)
## Sample :
## Docs
## Terms 1 2 3
## 반갑다 0 1 1
## 안녕 2 0 0
## 친구들 0 1 0
## 친구야 0 0 2
inspect(zTdmRemoveTerms(tdm1, "반갑다"))
## <<TermDocumentMatrix (terms: 3, documents: 3)>>
## Non-/sparse entries: 3/6
## Sparsity : 67%
## Maximal term length: 3
## Weighting : term frequency (tf)
## Sample :
## Docs
## Terms 1 2 3
## 안녕 2 0 0
## 친구들 0 1 0
## 친구야 0 0 2
inspect(zTdmRemoveTerms(tdm1, c("반갑다","친구야","아무말")))
## <<TermDocumentMatrix (terms: 2, documents: 3)>>
## Non-/sparse entries: 2/4
## Sparsity : 67%
## Maximal term length: 3
## Weighting : term frequency (tf)
## Sample :
## Docs
## Terms 1 2 3
## 안녕 2 0 0
## 친구들 0 1 0