
Package index
- 
          
TextReuseCorpus()is.TextReuseCorpus()skipped() - TextReuseCorpus
 
- 
          
tokens()`tokens<-`()hashes()`hashes<-`()minhashes()`minhashes<-`() - Accessors for TextReuse objects
 
- 
          
TextReuseTextDocument()is.TextReuseTextDocument()has_content()has_tokens()has_hashes()has_minhashes() - TextReuseTextDocument
 
- 
          
align_local() - Local alignment of natural language texts
 
- 
          
as.matrix(<textreuse_candidates>) - Convert candidates data frames to other formats
 
- 
          
filenames() - Filenames from paths
 
- 
          
hash_string() - Hash a string to an integer
 
- 
          
lsh() - Locality sensitive hashing for minhash
 
- 
          
lsh_candidates() - Candidate pairs from LSH comparisons
 
- 
          
lsh_compare() - Compare candidates identified by LSH
 
- 
          
lsh_probability()lsh_threshold() - Probability that a candidate pair will be detected with LSH
 
- 
          
lsh_query() - Query a LSH cache for matches to a single document
 
- 
          
lsh_subset() - List of all candidates in a corpus
 
- 
          
minhash_generator() - Generate a minhash function
 
- 
          
pairwise_candidates() - Candidate pairs from pairwise comparisons
 
- 
          
pairwise_compare() - Pairwise comparisons among documents in a corpus
 
- 
          
rehash() - Recompute the hashes for a document or corpus
 
- 
          
jaccard_similarity()jaccard_dissimilarity()jaccard_bag_similarity()ratio_of_matches() - Measure similarity/dissimilarity in documents
 
- 
          
textreusetextreuse-package - textreuse: Detect Text Reuse and Document Similarity
 
- 
          
tokenize() - Recompute the tokens for a document or corpus
 
- 
          
tokenize_words()tokenize_sentences()tokenize_ngrams()tokenize_skip_ngrams() - Split texts into tokens
 
- 
          
wordcount() - Count words