
Package index
-
TextReuseCorpus()is.TextReuseCorpus()skipped() - TextReuseCorpus
-
tokens()`tokens<-`()hashes()`hashes<-`()minhashes()`minhashes<-`() - Accessors for TextReuse objects
-
TextReuseTextDocument()is.TextReuseTextDocument()has_content()has_tokens()has_hashes()has_minhashes() - TextReuseTextDocument
-
align_local() - Local alignment of natural language texts
-
as.matrix(<textreuse_candidates>) - Convert candidates data frames to other formats
-
filenames() - Filenames from paths
-
hash_string() - Hash a string to an integer
-
lsh() - Locality sensitive hashing for minhash
-
lsh_candidates() - Candidate pairs from LSH comparisons
-
lsh_compare() - Compare candidates identified by LSH
-
lsh_probability()lsh_threshold() - Probability that a candidate pair will be detected with LSH
-
lsh_query() - Query a LSH cache for matches to a single document
-
lsh_subset() - List of all candidates in a corpus
-
minhash_generator() - Generate a minhash function
-
pairwise_candidates() - Candidate pairs from pairwise comparisons
-
pairwise_compare() - Pairwise comparisons among documents in a corpus
-
rehash() - Recompute the hashes for a document or corpus
-
jaccard_similarity()jaccard_dissimilarity()jaccard_bag_similarity()ratio_of_matches() - Measure similarity/dissimilarity in documents
-
textreusetextreuse-package - textreuse: Detect Text Reuse and Document Similarity
-
tokenize() - Recompute the tokens for a document or corpus
-
tokenize_words()tokenize_sentences()tokenize_ngrams()tokenize_skip_ngrams() - Split texts into tokens
-
wordcount() - Count words