This function helps in defining a subset of ngram files which should be imported, since importing all ngrams at once can be very expensive (in terms of cpu and memory).
Value
A list of zip-locations which can be read via jst_get_ngram()
.
Examples
# create sample output
tmp <- tempdir()
jst_import_zip(jst_example("pseudo_dfr.zip"),
import_spec = jst_define_import(book = jst_get_book),
out_file = "test", out_path = tmp)
#> Processing files for book_chapter with functions jst_get_book
# re-import as our selection for which we would like to import ngrams
selection <- jst_re_import(file.path(tmp,
"test_book_chapter_jst_get_book-1.csv"))
# get location of file
zip_loc <- jst_subset_ngrams(jst_example("pseudo_dfr.zip"), "ngram1",
selection)
# import ngram
jst_get_ngram(zip_loc[[1]])
#> # A tibble: 2 × 3
#> file_name ngram n
#> <chr> <chr> <int>
#> 1 book-chapter-standard_book Common 400
#> 2 book-chapter-standard_book Uncommon 5
unlink(tmp)