Convert PubMed Central table nodes into a list of tibbles
Note
Saves the caption and footnotes as attributes and collapses multiline headers, expands all rowspan and colspan attributes and adds subheadings to column one.
Examples
# doc <- pmc_xml("PMC2231364")
doc <- xml2::read_xml(system.file("extdata/PMC2231364.xml",
package = "tidypmc"
))
x <- pmc_table(doc)
#> Parsing 4 tables
#> Adding footnotes to Table 1
sapply(x, dim)
#> Table 1 Table 2 Table 3 Table 4
#> [1,] 39 23 4 34
#> [2,] 5 5 4 4
x
#> $`Table 1`
#> # A tibble: 39 × 5
#> subheading Potential operon (r v…¹ Gene …² Putat…³ Refer…⁴
#> <chr> <chr> <chr> <chr> <chr>
#> 1 Iron uptake or heme synthesis yfeABCD operon* (r > 0… YPO243… Transp… yfeABC…
#> 2 Iron uptake or heme synthesis hmuRSTUV operon (r > 0… YPO027… Transp… hmuRST…
#> 3 Iron uptake or heme synthesis ysuJIHG* (r > 0.95) YPO152… Iron u… -
#> 4 Iron uptake or heme synthesis sufABCDS* (r > 0.90) YPO240… Iron-r… -
#> 5 Iron uptake or heme synthesis YPO1854-1856* (r > 0.9… YPO185… Iron u… -
#> 6 Sulfur metabolism tauABCD operon (r > 0.… YPO018… Transp… tauABC…
#> 7 Sulfur metabolism ssuEADCB operon (r > 0… YPO362… Sulphu… ssu op…
#> 8 Sulfur metabolism cys operon (r > 0.92) YPO301… Cystei… -
#> 9 Sulfur metabolism YPO1317-1319 (r > 0.97) YPO131… Sulfur… -
#> 10 Sulfur metabolism YPO4109-4111 (r > 0.90) YPO410… Sulfur… -
#> # … with 29 more rows, and abbreviated variable names
#> # ¹`Potential operon (r value)`, ²`Gene ID`,
#> # ³`Putative or predicted function`, ⁴`Reference (s)`
#>
#> $`Table 2`
#> # A tibble: 23 × 5
#> subheading `Gene locus` `Gene ID` Description refer…¹
#> <chr> <chr> <chr> <chr> <chr>
#> 1 Category A: Proven yfeABCD YPO2439-2442 Inorganic … [36]
#> 2 Category A: Proven yfuABC YPO2958-2960 Inorganic … [37]
#> 3 Category A: Proven ybt locus YPO1906-1916 Siderophor… [74]
#> 4 Category A: Proven hmuRSTUV YPO0279-0283 Heme trans… [38]
#> 5 Category A: Proven TonB-exbB-exbD YPO2193, YPO0682-0683 TonB-ExbB-… [75]
#> 6 Category A: Proven yiuABCR YPO1310-1313 Putative s… [76]
#> 7 Category A: Proven ysuFJIHG YPO1528-1532 Siderophor… [76]
#> 8 Category B: Putative fitABCD YPO4022-4025 Putative i… NA
#> 9 Category B: Putative Others YPO0778-0776 putative s… NA
#> 10 Category B: Putative NA YPO1011-1012 putative T… NA
#> # … with 13 more rows, and abbreviated variable name ¹reference
#>
#> $`Table 3`
#> # A tibble: 4 × 4
#> Cluster `Genes or operons for motif discovery` Stric…¹ Hits …²
#> <chr> <chr> <chr> <chr>
#> 1 Cluster I rps-rpm-rpl operon, rpsLG, rpsF-priB-rpsR-rplI, p… PurR-l… rps-rp…
#> 2 Cluster II hmuRSTUV, YPO0682, YPO0778, YPO0988, YPO1003, YPO… Fur-li… hmuRST…
#> 3 Cluster III cysB, ssuEADCB, cysK, YPO3541, YPO1517-YPO1516, Y… Fnr-li… ssuEAD…
#> 4 Cluster IV sdhCDAB-sucABCD, nuoA-N, cyoABCDE, purB, pta, kbl… Fnr/Cr… sdhCDA…
#> # … with abbreviated variable names
#> # ¹`Strict consensus of known TF-like box (See also Figure 4)`,
#> # ²`Hits of consensus`
#>
#> $`Table 4`
#> # A tibble: 34 × 4
#> subheading `Environmental perturbation` Description Refer…¹
#> <chr> <chr> <chr> <chr>
#> 1 Stimulon analysis Temperature shift NA [8, 9]
#> 2 Stimulon analysis Vegetative growth temperatures Shift from 26°C to … NA
#> 3 Stimulon analysis Heat shock Shift from 37°C to … NA
#> 4 Stimulon analysis Cold shock Shift from 37°C to … NA
#> 5 Stimulon analysis Osmotic stress NA [10]
#> 6 Stimulon analysis High osmolarity Treatment with 0.5 … NA
#> 7 Stimulon analysis High salinity Treatment with 0.5 … NA
#> 8 Stimulon analysis Oxidative stress Treatment with 1 mM… NA
#> 9 Stimulon analysis Mild acid stress Shift from pH7.2 to… NA
#> 10 Stimulon analysis Low Mg2+ Growth under 10 μM … [15]
#> # … with 24 more rows, and abbreviated variable name ¹`Reference (s)`
#>
attributes(x[[1]])
#> $names
#> [1] "subheading" "Potential operon (r value)"
#> [3] "Gene ID" "Putative or predicted function"
#> [5] "Reference (s)"
#>
#> $row.names
#> [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
#> [26] 26 27 28 29 30 31 32 33 34 35 36 37 38 39
#>
#> $class
#> [1] "tbl_df" "tbl" "data.frame"
#>
#> $caption
#> [1] "Stress-responsive operons in Y. pestis predicted from microarray expression data"
#>
#> $footnotes
#> [1] "'r' represents the correlation coefficient of adjacent genes; '*' represent the defined operon has the similar expression pattern in two other published microarray datasets [7, 21]; '?' inferred functions of uncharacterized genes; '-' means the corresponding operons have not been experimentally validated in other bacteria."
#>