Get or set global options affecting functions across quanteda.
quanteda_options(..., reset = FALSE, initialize = FALSE)
... | options to be set, as key-value pair, same as
|
---|---|
reset | logical; if |
initialize | logical; if |
When called using a key = value
pair (where key
can be
a label or quoted character name)), the option is set and TRUE
is
returned invisibly.
When called with no arguments, a named list of the package options is returned.
When called with reset = TRUE
as an argument, all arguments are
options are reset to their default values, and TRUE
is returned
invisibly.
Currently available options are:
verbose
logical; if TRUE
then use this as the default
for all functions with a verbose
argument
threads
integer; specifies the number of threads to use in
parallelized functions; defaults to RcppParallel::defaultNumThreads()
;
the number of threads can be changed only once in a session
print_dfm_max_ndoc
integer; specifies the number of documents to display when using the defaults for printing a dfm
print_dfm_max_nfeat
integer; specifies the number of features to display when using the defaults for printing a dfm
base_docname
character; stem name for documents that are unnamed when a corpus, tokens, or dfm are created or when a dfm is converted from another object
base_featname
character; stem name for features that are unnamed when they are added, for whatever reason, to a dfm through an operation that adds features
base_compname
character; stem name for components that are created by matrix factorization
language_stemmer
character; language option for char_wordstem()
,
tokens_wordstem()
, and dfm_wordstem()
pattern_hashtag
, pattern_username
character; regex patterns for (social media) hashtags and usernames respectively, used to avoid segmenting these in the default internal "word" tokenizer
tokens_block_size
integer; specifies the number of documents to be tokenized at a time in blocked tokenization. When the number is large, tokenization becomes faster but also memory-intensive.
tokens_locale
character; specify locale in stringi boundary detection in
tokenization and corpus reshaping. See stringi::stri_opts_brkiter()
.
(opt <- quanteda_options())#> $threads #> [1] 12 #> #> $verbose #> [1] FALSE #> #> $print_dfm_max_ndoc #> [1] 6 #> #> $print_dfm_max_nfeat #> [1] 10 #> #> $print_dfm_summary #> [1] TRUE #> #> $print_corpus_max_ndoc #> [1] 6 #> #> $print_corpus_max_nchar #> [1] 60 #> #> $print_corpus_summary #> [1] TRUE #> #> $print_tokens_max_ndoc #> [1] 6 #> #> $print_tokens_max_ntoken #> [1] 12 #> #> $print_tokens_summary #> [1] TRUE #> #> $print_dictionary_max_nkey #> [1] 6 #> #> $print_dictionary_max_nval #> [1] 20 #> #> $print_dictionary_summary #> [1] TRUE #> #> $print_kwic_max_nrow #> [1] 1000 #> #> $print_kwic_summary #> [1] TRUE #> #> $base_docname #> [1] "text" #> #> $base_featname #> [1] "feat" #> #> $base_compname #> [1] "comp" #> #> $language_stemmer #> [1] "english" #> #> $pattern_hashtag #> [1] "#\\w+#?" #> #> $pattern_username #> [1] "@[a-zA-Z0-9_]+" #> #> $tokens_block_size #> [1] 10000 #> #> $tokens_locale #> [1] "en_US@ss=standard" #># \donttest{ quanteda_options(verbose = TRUE) quanteda_options("verbose" = FALSE) quanteda_options("threads")#> [1] 12quanteda_options(print_dfm_max_ndoc = 50L) # reset to defaults quanteda_options(reset = TRUE) # reset to saved options quanteda_options(opt) # }