Displays information about a corpus, including attributes and metadata such as date of number of texts, creation and source.

# S3 method for corpus
summary(object, n = 100, tolower = FALSE, showmeta = TRUE, ...)

Arguments

object

corpus to be summarized

n

maximum number of texts to describe, default=100

tolower

convert texts to lower case before counting types

showmeta

set to TRUE to include document-level meta-data

...

additional arguments passed through to tokens()

Examples

summary(data_corpus_inaugural)
#> Corpus consisting of 59 documents, showing 59 documents:
#> 
#>             Text Types Tokens Sentences Year  President       FirstName
#>  1789-Washington   625   1537        23 1789 Washington          George
#>  1793-Washington    96    147         4 1793 Washington          George
#>       1797-Adams   826   2577        37 1797      Adams            John
#>   1801-Jefferson   717   1923        41 1801  Jefferson          Thomas
#>   1805-Jefferson   804   2380        45 1805  Jefferson          Thomas
#>     1809-Madison   535   1261        21 1809    Madison           James
#>     1813-Madison   541   1302        33 1813    Madison           James
#>      1817-Monroe  1040   3677       121 1817     Monroe           James
#>      1821-Monroe  1259   4886       131 1821     Monroe           James
#>       1825-Adams  1003   3147        74 1825      Adams     John Quincy
#>     1829-Jackson   517   1208        25 1829    Jackson          Andrew
#>     1833-Jackson   499   1267        29 1833    Jackson          Andrew
#>    1837-VanBuren  1315   4158        95 1837  Van Buren          Martin
#>    1841-Harrison  1896   9125       210 1841   Harrison   William Henry
#>        1845-Polk  1334   5186       153 1845       Polk      James Knox
#>      1849-Taylor   496   1178        22 1849     Taylor         Zachary
#>      1853-Pierce  1165   3636       104 1853     Pierce        Franklin
#>    1857-Buchanan   945   3083        89 1857   Buchanan           James
#>     1861-Lincoln  1075   3999       135 1861    Lincoln         Abraham
#>     1865-Lincoln   360    775        26 1865    Lincoln         Abraham
#>       1869-Grant   485   1229        40 1869      Grant      Ulysses S.
#>       1873-Grant   552   1472        43 1873      Grant      Ulysses S.
#>       1877-Hayes   831   2707        59 1877      Hayes   Rutherford B.
#>    1881-Garfield  1021   3209       111 1881   Garfield        James A.
#>   1885-Cleveland   676   1816        44 1885  Cleveland          Grover
#>    1889-Harrison  1352   4721       157 1889   Harrison        Benjamin
#>   1893-Cleveland   821   2125        58 1893  Cleveland          Grover
#>    1897-McKinley  1232   4353       130 1897   McKinley         William
#>    1901-McKinley   854   2437       100 1901   McKinley         William
#>   1905-Roosevelt   404   1079        33 1905  Roosevelt        Theodore
#>        1909-Taft  1437   5821       158 1909       Taft  William Howard
#>      1913-Wilson   658   1882        68 1913     Wilson         Woodrow
#>      1917-Wilson   549   1652        59 1917     Wilson         Woodrow
#>     1921-Harding  1169   3719       148 1921    Harding       Warren G.
#>    1925-Coolidge  1220   4440       196 1925   Coolidge          Calvin
#>      1929-Hoover  1090   3860       158 1929     Hoover         Herbert
#>   1933-Roosevelt   743   2057        85 1933  Roosevelt     Franklin D.
#>   1937-Roosevelt   725   1989        96 1937  Roosevelt     Franklin D.
#>   1941-Roosevelt   526   1519        68 1941  Roosevelt     Franklin D.
#>   1945-Roosevelt   275    633        27 1945  Roosevelt     Franklin D.
#>      1949-Truman   781   2504       116 1949     Truman        Harry S.
#>  1953-Eisenhower   900   2743       119 1953 Eisenhower       Dwight D.
#>  1957-Eisenhower   621   1907        92 1957 Eisenhower       Dwight D.
#>     1961-Kennedy   566   1541        52 1961    Kennedy         John F.
#>     1965-Johnson   568   1710        93 1965    Johnson   Lyndon Baines
#>       1969-Nixon   743   2416       103 1969      Nixon Richard Milhous
#>       1973-Nixon   544   1995        68 1973      Nixon Richard Milhous
#>      1977-Carter   527   1370        52 1977     Carter           Jimmy
#>      1981-Reagan   902   2781       129 1981     Reagan          Ronald
#>      1985-Reagan   925   2909       123 1985     Reagan          Ronald
#>        1989-Bush   795   2674       141 1989       Bush          George
#>     1993-Clinton   642   1833        81 1993    Clinton            Bill
#>     1997-Clinton   773   2436       111 1997    Clinton            Bill
#>        2001-Bush   621   1806        97 2001       Bush       George W.
#>        2005-Bush   772   2312        99 2005       Bush       George W.
#>       2009-Obama   938   2689       110 2009      Obama          Barack
#>       2013-Obama   814   2317        88 2013      Obama          Barack
#>       2017-Trump   582   1660        88 2017      Trump       Donald J.
#>       2021-Biden   812   2766       216 2021      Biden       Joseph R.
#>                  Party
#>                   none
#>                   none
#>             Federalist
#>  Democratic-Republican
#>  Democratic-Republican
#>  Democratic-Republican
#>  Democratic-Republican
#>  Democratic-Republican
#>  Democratic-Republican
#>  Democratic-Republican
#>             Democratic
#>             Democratic
#>             Democratic
#>                   Whig
#>                   Whig
#>                   Whig
#>             Democratic
#>             Democratic
#>             Republican
#>             Republican
#>             Republican
#>             Republican
#>             Republican
#>             Republican
#>             Democratic
#>             Republican
#>             Democratic
#>             Republican
#>             Republican
#>             Republican
#>             Republican
#>             Democratic
#>             Democratic
#>             Republican
#>             Republican
#>             Republican
#>             Democratic
#>             Democratic
#>             Democratic
#>             Democratic
#>             Democratic
#>             Republican
#>             Republican
#>             Democratic
#>             Democratic
#>             Republican
#>             Republican
#>             Democratic
#>             Republican
#>             Republican
#>             Republican
#>             Democratic
#>             Democratic
#>             Republican
#>             Republican
#>             Democratic
#>             Democratic
#>             Republican
#>             Democratic
#> 
summary(data_corpus_inaugural, n = 10)
#> Corpus consisting of 59 documents, showing 10 documents:
#> 
#>             Text Types Tokens Sentences Year  President   FirstName
#>  1789-Washington   625   1537        23 1789 Washington      George
#>  1793-Washington    96    147         4 1793 Washington      George
#>       1797-Adams   826   2577        37 1797      Adams        John
#>   1801-Jefferson   717   1923        41 1801  Jefferson      Thomas
#>   1805-Jefferson   804   2380        45 1805  Jefferson      Thomas
#>     1809-Madison   535   1261        21 1809    Madison       James
#>     1813-Madison   541   1302        33 1813    Madison       James
#>      1817-Monroe  1040   3677       121 1817     Monroe       James
#>      1821-Monroe  1259   4886       131 1821     Monroe       James
#>       1825-Adams  1003   3147        74 1825      Adams John Quincy
#>                  Party
#>                   none
#>                   none
#>             Federalist
#>  Democratic-Republican
#>  Democratic-Republican
#>  Democratic-Republican
#>  Democratic-Republican
#>  Democratic-Republican
#>  Democratic-Republican
#>  Democratic-Republican
#> 
corp <- corpus(data_char_ukimmig2010,
               docvars = data.frame(party=names(data_char_ukimmig2010)))
summary(corp, showmeta = TRUE) # show the meta-data
#> Corpus consisting of 9 documents, showing 9 documents:
#> 
#>          Text Types Tokens Sentences        party
#>           BNP  1125   3280        88          BNP
#>     Coalition   142    260         4    Coalition
#>  Conservative   251    499        15 Conservative
#>        Greens   322    679        21       Greens
#>        Labour   298    683        29       Labour
#>        LibDem   251    483        14       LibDem
#>            PC    77    114         5           PC
#>           SNP    88    134         4          SNP
#>          UKIP   346    723        26         UKIP
#> 
sumcorp <- summary(corp) # (quietly) assign the results
sumcorp$Types / sumcorp$Tokens # crude type-token ratio
#> [1] 0.3429878 0.5461538 0.5030060 0.4742268 0.4363104 0.5196687 0.6754386
#> [8] 0.6567164 0.4785615