Get the features from a document-feature matrix, which are stored as the column names of the dfm object.

featnames(x)

Arguments

x

the dfm whose features will be extracted

Value

character vector of the feature labels

Examples

dfmat <- dfm(data_corpus_inaugural)
#> Warning: 'dfm.corpus()' is deprecated. Use 'tokens()' first.
# first 50 features (in original text order) head(featnames(dfmat), 50)
#> [1] "fellow-citizens" "of" "the" "senate" #> [5] "and" "house" "representatives" ":" #> [9] "among" "vicissitudes" "incident" "to" #> [13] "life" "no" "event" "could" #> [17] "have" "filled" "me" "with" #> [21] "greater" "anxieties" "than" "that" #> [25] "which" "notification" "was" "transmitted" #> [29] "by" "your" "order" "," #> [33] "received" "on" "14th" "day" #> [37] "present" "month" "." "one" #> [41] "hand" "i" "summoned" "my" #> [45] "country" "whose" "voice" "can" #> [49] "never" "hear"
# first 50 features alphabetically head(sort(featnames(dfmat)), 50)
#> [1] "-" "," ";" ":" "!" #> [6] "?" "." "…" "'" "\"" #> [11] "(" ")" "[" "]" "/" #> [16] "\\" "$" "1" "1,000" "100" #> [21] "100,000,000" "108" "11" "120,000,000" "125" #> [26] "13" "14th" "15th" "16" "1774" #> [31] "1776" "1778" "1780" "1787" "1789" #> [36] "1790" "1800" "1801" "1812" "1815" #> [41] "1816" "1817" "1818" "1826" "1850" #> [46] "1861" "1863" "1868" "1873" "1880"
# contrast with descending total frequency order from topfeatures() names(topfeatures(dfmat, 50))
#> [1] "the" "of" "," "and" "." #> [6] "to" "in" "a" "our" "we" #> [11] "that" "be" "is" "it" "for" #> [16] "by" "have" "which" "not" "with" #> [21] "as" "will" "this" "i" "all" #> [26] "are" "their" "but" "has" "people" #> [31] "from" "its" ";" "government" "or" #> [36] "on" "my" "us" "been" "can" #> [41] "no" "they" "-" "so" "an" #> [46] "who" "must" "upon" "at" "great"