Get the features from a document-feature matrix, which are stored as the column names of the dfm object.
featnames(x)
x | the dfm whose features will be extracted |
---|
character vector of the feature labels
dfmat <- dfm(data_corpus_inaugural) #> Warning: 'dfm.corpus()' is deprecated. Use 'tokens()' first. # first 50 features (in original text order) head(featnames(dfmat), 50) #> [1] "fellow-citizens" "of" "the" "senate" #> [5] "and" "house" "representatives" ":" #> [9] "among" "vicissitudes" "incident" "to" #> [13] "life" "no" "event" "could" #> [17] "have" "filled" "me" "with" #> [21] "greater" "anxieties" "than" "that" #> [25] "which" "notification" "was" "transmitted" #> [29] "by" "your" "order" "," #> [33] "received" "on" "14th" "day" #> [37] "present" "month" "." "one" #> [41] "hand" "i" "summoned" "my" #> [45] "country" "whose" "voice" "can" #> [49] "never" "hear" # first 50 features alphabetically head(sort(featnames(dfmat)), 50) #> [1] "-" "," ";" ":" "!" #> [6] "?" "." "…" "'" "\"" #> [11] "(" ")" "[" "]" "/" #> [16] "\\" "$" "1" "1,000" "100" #> [21] "100,000,000" "108" "11" "120,000,000" "125" #> [26] "13" "14th" "15th" "16" "1774" #> [31] "1776" "1778" "1780" "1787" "1789" #> [36] "1790" "1800" "1801" "1812" "1815" #> [41] "1816" "1817" "1818" "1826" "1850" #> [46] "1861" "1863" "1868" "1873" "1880" # contrast with descending total frequency order from topfeatures() names(topfeatures(dfmat, 50)) #> [1] "the" "of" "," "and" "." #> [6] "to" "in" "a" "our" "we" #> [11] "that" "be" "is" "it" "for" #> [16] "by" "have" "which" "not" "with" #> [21] "as" "will" "this" "i" "all" #> [26] "are" "their" "but" "has" "people" #> [31] "from" "its" ";" "government" "or" #> [36] "on" "my" "us" "been" "can" #> [41] "no" "they" "-" "so" "an" #> [46] "who" "must" "upon" "at" "great"