Using quanteda’s fcm() and textplot_network(), you can perform visual analysis of social media posts in terms of co-occurrences of hashtags or usernames in a few steps. The dataset for this example contains only 10,000 Twitter posts, but you can easily analyse more than one million posts on your laptop computer.

Load sample data

load("data/data_corpus_tweets.rda")

Construct a document-feature matrix of Twitter posts

tweet_dfm <- tokens(data_corpus_tweets, remove_punct = TRUE) %>%
    dfm()
head(tweet_dfm)
## Document-feature matrix of: 6 documents, 42,398 features (99.97% sparse) and 34 docvars.
##         features
## docs     oggi pomeriggio a partire dalle 18.00 interverrò #pomeriggio5 su
##   tweet1    1          1 2       1     1     1          1            1  1
##   tweet2    0          0 1       0     0     0          0            0  0
##   tweet3    0          0 0       0     0     0          0            0  0
##   tweet4    0          0 0       0     0     0          0            0  0
##   tweet5    0          0 0       0     0     0          0            0  0
##   tweet6    0          0 0       0     0     0          0            0  0
##         features
## docs     #canale5
##   tweet1        1
##   tweet2        0
##   tweet3        0
##   tweet4        0
##   tweet5        0
##   tweet6        0
## [ reached max_nfeat ... 42,388 more features ]

Hashtags

Extract most common hashtags

tag_dfm <- dfm_select(tweet_dfm, pattern = "#*")
toptag <- names(topfeatures(tag_dfm, 50))
head(toptag)
## [1] "#ep2014"       "#salvini"      "#fdian"        "#ukip"        
## [5] "#caraacaratve" "#alzalatesta"

Construct feature-occurrence matrix of hashtags

library("quanteda.textplots")
tag_fcm <- fcm(tag_dfm)
head(tag_fcm)
## Feature co-occurrence matrix of: 6 by 2,755 features.
##                  features
## features          #pomeriggio5 #canale5 #miaou #iovotoitaliano #fdian #bravo
##   #pomeriggio5               0        2      0               0      0      0
##   #canale5                   0        0      0               0      1      0
##   #miaou                     0        0      0               0      0      0
##   #iovotoitaliano            0        0      0               0     60      0
##   #fdian                     0        0      0               0      0      0
##   #bravo                     0        0      0               0      0      0
##                  features
## features          #primaveraeuropea #dpdabayrou #umps #pesticides
##   #pomeriggio5                    0           0     0           0
##   #canale5                        0           0     0           0
##   #miaou                          0           0     0           0
##   #iovotoitaliano                 0           0     0           0
##   #fdian                          0           0     0           0
##   #bravo                          0           0     0           0
## [ reached max_nfeat ... 2,745 more features ]
topgat_fcm <- fcm_select(tag_fcm, pattern = toptag)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.8, edge_size = 5)

Usernames

Extract most frequently mentioned usernames

user_dfm <- dfm_select(tweet_dfm, pattern = "@*")
topuser <- names(topfeatures(user_dfm, 50))
head(topuser)
## [1] "@pablo_iglesias_" "@elenavalenciano" "@canete2014_"     "@nigel_farage"   
## [5] "@martinschulz"    "@giorgiameloni"

Construct feature-occurrence matrix of usernames

user_fcm <- fcm(user_dfm)
head(user_fcm)
## Feature co-occurrence matrix of: 6 by 5,951 features.
##                   features
## features           @pacomarhuenda @pablo_iglesias_ @kopriths @gapatzhs
##   @pacomarhuenda                0                1         0         0
##   @pablo_iglesias_              0                0         0         0
##   @kopriths                     0                0         0         1
##   @gapatzhs                     0                0         0         0
##   @mariaspyraki                 0                0         0         0
##   @ernesturtasun                0                0         0         0
##                   features
## features           @mariaspyraki @ernesturtasun @gabrielamard @nigel_farage
##   @pacomarhuenda               0              0             0             0
##   @pablo_iglesias_             0              1             0             0
##   @kopriths                    1              0             0             0
##   @gapatzhs                    1              0             0             0
##   @mariaspyraki                0              0             0             0
##   @ernesturtasun               0              0             0             0
##                   features
## features           @ukip @youtube
##   @pacomarhuenda       0        0
##   @pablo_iglesias_     0        0
##   @kopriths            0        0
##   @gapatzhs            0        0
##   @mariaspyraki        0        0
##   @ernesturtasun       0        0
## [ reached max_nfeat ... 5,941 more features ]
user_fcm <- fcm_select(user_fcm, pattern = topuser)
textplot_network(user_fcm, min_freq = 0.1, edge_color = "orange", edge_alpha = 0.8, edge_size = 5)