Reads taxonomic information and associated data in tables, lists, and vectors and stores it in a
taxmap()
object. Taxonomic classifications must be
present.
parse_tax_data( tax_data, datasets = list(), class_cols = 1, class_sep = ";", sep_is_regex = FALSE, class_key = "taxon_name", class_regex = "(.*)", class_reversed = FALSE, include_match = TRUE, mappings = c(), include_tax_data = TRUE, named_by_rank = FALSE )
tax_data | A table, list, or vector that contains the names of taxa that represent
taxonomic classifications.
Accepted representations of classifications include: * A list/vector or table with column(s) of
taxon names: Something like |
---|---|
datasets | Additional lists/vectors/tables that should be included in the resulting |
class_cols | ( |
class_sep | ( |
sep_is_regex | ( |
class_key | ( |
class_regex | ( |
class_reversed | If |
include_match | ( |
mappings | (named |
include_tax_data | ( |
named_by_rank | ( |
Other parsers:
extract_tax_data()
,
lookup_tax_data()
,
parse_edge_list()
# Read a vector of classifications my_taxa <- c("Mammalia;Carnivora;Felidae", "Mammalia;Carnivora;Felidae", "Mammalia;Carnivora;Ursidae") parse_tax_data(my_taxa, class_sep = ";")#> <Taxmap> #> 4 taxa: b. Mammalia, c. Carnivora, d. Felidae, e. Ursidae #> 4 edges: NA->b, b->c, c->d, c->e #> 1 data sets: #> tax_data: a named vector of 'character' with 3 items #> d. Mammalia;Carnivora;Felidae ... e. Mammalia;Carnivora;Ursidae #> 0 functions:# Read a list of classifications my_taxa <- list("Mammalia;Carnivora;Felidae", "Mammalia;Carnivora;Felidae", "Mammalia;Carnivora;Ursidae") parse_tax_data(my_taxa, class_sep = ";")#> <Taxmap> #> 4 taxa: b. Mammalia, c. Carnivora, d. Felidae, e. Ursidae #> 4 edges: NA->b, b->c, c->d, c->e #> 1 data sets: #> tax_data: a list of 3 items named by taxa: #> d, d, e #> 0 functions:# Read classifications in a table in a single column species_data <- data.frame(tax = c("Mammalia;Carnivora;Felidae", "Mammalia;Carnivora;Felidae", "Mammalia;Carnivora;Ursidae"), species_id = c("A", "B", "C")) parse_tax_data(species_data, class_sep = ";", class_cols = "tax")#> <Taxmap> #> 4 taxa: b. Mammalia, c. Carnivora, d. Felidae, e. Ursidae #> 4 edges: NA->b, b->c, c->d, c->e #> 1 data sets: #> tax_data: #> # A tibble: 3 x 3 #> taxon_id tax species_id #> <chr> <chr> <chr> #> 1 d Mammalia;Carnivora;Felidae A #> 2 d Mammalia;Carnivora;Felidae B #> 3 e Mammalia;Carnivora;Ursidae C #> 0 functions:# Read classifications in a table in multiple columns species_data <- data.frame(lineage = c("Mammalia;Carnivora;Felidae", "Mammalia;Carnivora;Felidae", "Mammalia;Carnivora;Ursidae"), species = c("Panthera leo", "Panthera tigris", "Ursus americanus"), species_id = c("A", "B", "C")) parse_tax_data(species_data, class_sep = c(" ", ";"), class_cols = c("lineage", "species"))#> <Taxmap> #> 9 taxa: b. Mammalia, c. Carnivora ... i. tigris, j. americanus #> 9 edges: NA->b, b->c, c->d, c->e, d->f, e->g, f->h, f->i, g->j #> 1 data sets: #> tax_data: #> # A tibble: 3 x 4 #> taxon_id lineage species species_id #> <chr> <chr> <chr> <chr> #> 1 h Mammalia;Carnivora;Felidae Panthera leo A #> 2 i Mammalia;Carnivora;Felidae Panthera tigris B #> 3 j Mammalia;Carnivora;Ursidae Ursus american… C #> 0 functions:# Read classification tables with one column per rank species_data <- data.frame(class = c("Mammalia", "Mammalia", "Mammalia"), order = c("Carnivora", "Carnivora", "Carnivora"), family = c("Felidae", "Felidae", "Ursidae"), genus = c("Panthera", "Panthera", "Ursus"), species = c("leo", "tigris", "americanus"), species_id = c("A", "B", "C")) parse_tax_data(species_data, class_cols = 1:5)#> <Taxmap> #> 9 taxa: b. Mammalia, c. Carnivora ... i. tigris, j. americanus #> 9 edges: NA->b, b->c, c->d, c->e, d->f, e->g, f->h, f->i, g->j #> 1 data sets: #> tax_data: #> # A tibble: 3 x 7 #> taxon_id class order family genus species species_id #> <chr> <chr> <chr> <chr> <chr> <chr> <chr> #> 1 h Mammalia Carnivo… Felid… Panthe… leo A #> 2 i Mammalia Carnivo… Felid… Panthe… tigris B #> 3 j Mammalia Carnivo… Ursid… Ursus american… C #> 0 functions:parse_tax_data(species_data, class_cols = 1:5, named_by_rank = TRUE) # makes `taxon_ranks()` work#> <Taxmap> #> 9 taxa: b. Mammalia, c. Carnivora ... i. tigris, j. americanus #> 9 edges: NA->b, b->c, c->d, c->e, d->f, e->g, f->h, f->i, g->j #> 1 data sets: #> tax_data: #> # A tibble: 3 x 7 #> taxon_id class order family genus species species_id #> <chr> <chr> <chr> <chr> <chr> <chr> <chr> #> 1 h Mammalia Carnivo… Felid… Panthe… leo A #> 2 i Mammalia Carnivo… Felid… Panthe… tigris B #> 3 j Mammalia Carnivo… Ursid… Ursus american… C #> 0 functions:# Classifications with extra information my_taxa <- c("Mammalia_class_1;Carnivora_order_2;Felidae_genus_3", "Mammalia_class_1;Carnivora_order_2;Felidae_genus_3", "Mammalia_class_1;Carnivora_order_2;Ursidae_genus_3") parse_tax_data(my_taxa, class_sep = ";", class_regex = "(.+)_(.+)_([0-9]+)", class_key = c(my_name = "taxon_name", a_rank = "taxon_rank", some_num = "info"))#> <Taxmap> #> 4 taxa: b. Mammalia, c. Carnivora, d. Felidae, e. Ursidae #> 4 edges: NA->b, b->c, c->d, c->e #> 2 data sets: #> tax_data: a named vector of 'character' with 3 items #> d. Mammalia_class_1[truncated] ... e. Mammalia_class_1[truncated] #> class_data: #> # A tibble: 9 x 6 #> taxon_id input_index my_name a_rank some_num regex_match #> <chr> <int> <chr> <chr> <chr> <chr> #> 1 b 1 Mammalia class 1 Mammalia_class… #> 2 c 1 Carnivora order 2 Carnivora_orde… #> 3 d 1 Felidae genus 3 Felidae_genus_3 #> # … with 6 more rows #> 0 functions:# --- Parsing multiple datasets at once (advanced) --- # The rest is one example for how to classify multiple datasets at once. # Make example data with taxonomic classifications species_data <- data.frame(tax = c("Mammalia;Carnivora;Felidae", "Mammalia;Carnivora;Felidae", "Mammalia;Carnivora;Ursidae"), species = c("Panthera leo", "Panthera tigris", "Ursus americanus"), species_id = c("A", "B", "C")) # Make example data associated with the taxonomic data # Note how this does not contain classifications, but # does have a varaible in common with "species_data" ("id" = "species_id") abundance <- data.frame(id = c("A", "B", "C", "A", "B", "C"), sample_id = c(1, 1, 1, 2, 2, 2), counts = c(23, 4, 3, 34, 5, 13)) # Make another related data set named by species id common_names <- c(A = "Lion", B = "Tiger", C = "Bear", "Oh my!") # Make another related data set with no names foods <- list(c("ungulates", "boar"), c("ungulates", "boar"), c("salmon", "fruit", "nuts")) # Make a taxmap object with these three datasets x = parse_tax_data(species_data, datasets = list(counts = abundance, my_names = common_names, foods = foods), mappings = c("species_id" = "id", "species_id" = "{{name}}", "{{index}}" = "{{index}}"), class_cols = c("tax", "species"), class_sep = c(" ", ";")) # Note how all the datasets have taxon ids now x$data#> $tax_data #> # A tibble: 3 x 4 #> taxon_id tax species species_id #> <chr> <chr> <chr> <chr> #> 1 h Mammalia;Carnivora;Felidae Panthera leo A #> 2 i Mammalia;Carnivora;Felidae Panthera tigris B #> 3 j Mammalia;Carnivora;Ursidae Ursus americanus C #> #> $counts #> # A tibble: 6 x 4 #> taxon_id id sample_id counts #> <chr> <chr> <dbl> <dbl> #> 1 h A 1 23 #> 2 i B 1 4 #> 3 j C 1 3 #> 4 h A 2 34 #> 5 i B 2 5 #> 6 j C 2 13 #> #> $my_names #> h i j <NA> #> "Lion" "Tiger" "Bear" "Oh my!" #> #> $foods #> $foods$h #> [1] "ungulates" "boar" #> #> $foods$i #> [1] "ungulates" "boar" #> #> $foods$j #> [1] "salmon" "fruit" "nuts" #> #># This allows for complex mappings between variables that other functions use map_data(x, my_names, foods)#> $Lion #> [1] "ungulates" "boar" #> #> $Tiger #> [1] "ungulates" "boar" #> #> $Bear #> [1] "salmon" "fruit" "nuts" #> #> $`Oh my!` #> NULL #>#> 23 4 3 34 5 13 #> "Lion" "Tiger" "Bear" "Lion" "Tiger" "Bear"