Displays information about a corpus, including includes attributes and metadata such as date of number of texts, creation and source.

# S3 method for corpus
summary(object, n = 100, showmeta = FALSE,
  tolower = FALSE, ...)

Arguments

object

corpus to be summarized

n

maximum number of texts to describe, default=100

showmeta

set to TRUE to include document-level meta-data

tolower

convert texts to lower case before counting types

...

additional arguments passed through to tokenize

Examples

summary(data_corpus_inaugural)
#> Corpus consisting of 58 documents: #> #> Text Types Tokens Sentences Year President FirstName #> 1789-Washington 625 1538 23 1789 Washington George #> 1793-Washington 96 147 4 1793 Washington George #> 1797-Adams 826 2578 37 1797 Adams John #> 1801-Jefferson 717 1927 41 1801 Jefferson Thomas #> 1805-Jefferson 804 2381 45 1805 Jefferson Thomas #> 1809-Madison 535 1263 21 1809 Madison James #> 1813-Madison 541 1302 33 1813 Madison James #> 1817-Monroe 1040 3680 121 1817 Monroe James #> 1821-Monroe 1259 4886 129 1821 Monroe James #> 1825-Adams 1003 3152 74 1825 Adams John Quincy #> 1829-Jackson 517 1210 25 1829 Jackson Andrew #> 1833-Jackson 499 1269 29 1833 Jackson Andrew #> 1837-VanBuren 1315 4165 95 1837 Van Buren Martin #> 1841-Harrison 1896 9144 210 1841 Harrison William Henry #> 1845-Polk 1334 5193 153 1845 Polk James Knox #> 1849-Taylor 496 1179 22 1849 Taylor Zachary #> 1853-Pierce 1165 3641 104 1853 Pierce Franklin #> 1857-Buchanan 945 3086 89 1857 Buchanan James #> 1861-Lincoln 1075 4006 135 1861 Lincoln Abraham #> 1865-Lincoln 360 776 26 1865 Lincoln Abraham #> 1869-Grant 485 1235 40 1869 Grant Ulysses S. #> 1873-Grant 552 1475 43 1873 Grant Ulysses S. #> 1877-Hayes 831 2716 59 1877 Hayes Rutherford B. #> 1881-Garfield 1021 3212 111 1881 Garfield James A. #> 1885-Cleveland 676 1820 44 1885 Cleveland Grover #> 1889-Harrison 1352 4722 157 1889 Harrison Benjamin #> 1893-Cleveland 821 2125 58 1893 Cleveland Grover #> 1897-McKinley 1232 4361 130 1897 McKinley William #> 1901-McKinley 854 2437 100 1901 McKinley William #> 1905-Roosevelt 404 1079 33 1905 Roosevelt Theodore #> 1909-Taft 1437 5822 159 1909 Taft William Howard #> 1913-Wilson 658 1882 68 1913 Wilson Woodrow #> 1917-Wilson 549 1656 59 1917 Wilson Woodrow #> 1921-Harding 1169 3721 148 1921 Harding Warren G. #> 1925-Coolidge 1220 4440 196 1925 Coolidge Calvin #> 1929-Hoover 1090 3865 158 1929 Hoover Herbert #> 1933-Roosevelt 743 2062 85 1933 Roosevelt Franklin D. #> 1937-Roosevelt 725 1997 96 1937 Roosevelt Franklin D. #> 1941-Roosevelt 526 1544 68 1941 Roosevelt Franklin D. #> 1945-Roosevelt 275 647 26 1945 Roosevelt Franklin D. #> 1949-Truman 781 2513 116 1949 Truman Harry S. #> 1953-Eisenhower 900 2757 119 1953 Eisenhower Dwight D. #> 1957-Eisenhower 621 1931 92 1957 Eisenhower Dwight D. #> 1961-Kennedy 566 1566 52 1961 Kennedy John F. #> 1965-Johnson 568 1723 93 1965 Johnson Lyndon Baines #> 1969-Nixon 743 2437 103 1969 Nixon Richard Milhous #> 1973-Nixon 544 2012 68 1973 Nixon Richard Milhous #> 1977-Carter 527 1376 52 1977 Carter Jimmy #> 1981-Reagan 902 2790 128 1981 Reagan Ronald #> 1985-Reagan 925 2921 123 1985 Reagan Ronald #> 1989-Bush 795 2681 141 1989 Bush George #> 1993-Clinton 642 1833 81 1993 Clinton Bill #> 1997-Clinton 773 2449 111 1997 Clinton Bill #> 2001-Bush 621 1808 97 2001 Bush George W. #> 2005-Bush 773 2319 100 2005 Bush George W. #> 2009-Obama 938 2711 110 2009 Obama Barack #> 2013-Obama 814 2317 88 2013 Obama Barack #> 2017-Trump 582 1660 88 2017 Trump Donald J. #> #> Source: Gerhard Peters and John T. Woolley. The American Presidency Project. #> Created: Tue Jun 13 14:51:47 2017 #> Notes: http://www.presidency.ucsb.edu/inaugurals.php
summary(data_corpus_inaugural, n = 10)
#> Corpus consisting of 58 documents, showing 10 documents: #> #> Text Types Tokens Sentences Year President FirstName #> 1789-Washington 625 1538 23 1789 Washington George #> 1793-Washington 96 147 4 1793 Washington George #> 1797-Adams 826 2578 37 1797 Adams John #> 1801-Jefferson 717 1927 41 1801 Jefferson Thomas #> 1805-Jefferson 804 2381 45 1805 Jefferson Thomas #> 1809-Madison 535 1263 21 1809 Madison James #> 1813-Madison 541 1302 33 1813 Madison James #> 1817-Monroe 1040 3680 121 1817 Monroe James #> 1821-Monroe 1259 4886 129 1821 Monroe James #> 1825-Adams 1003 3152 74 1825 Adams John Quincy #> #> Source: Gerhard Peters and John T. Woolley. The American Presidency Project. #> Created: Tue Jun 13 14:51:47 2017 #> Notes: http://www.presidency.ucsb.edu/inaugurals.php
mycorpus <- corpus(data_char_ukimmig2010, docvars = data.frame(party=names(data_char_ukimmig2010))) summary(mycorpus, showmeta=TRUE) # show the meta-data
#> Corpus consisting of 9 documents: #> #> Text Types Tokens Sentences party #> BNP 1125 3280 88 BNP #> Coalition 142 260 4 Coalition #> Conservative 251 499 15 Conservative #> Greens 322 679 21 Greens #> Labour 298 683 29 Labour #> LibDem 251 483 14 LibDem #> PC 77 114 5 PC #> SNP 88 134 4 SNP #> UKIP 346 723 27 UKIP #> #> Source: /Users/kbenoit/Dropbox (Personal)/GitHub/quanteda/docs/reference/* on x86_64 by kbenoit #> Created: Fri Oct 6 09:36:07 2017 #> Notes:
mysummary <- summary(mycorpus) # (quietly) assign the results mysummary$Types / mysummary$Tokens # crude type-token ratio
#> [1] 0.3429878 0.5461538 0.5030060 0.4742268 0.4363104 0.5196687 0.6754386 #> [8] 0.6567164 0.4785615