Introduction to the wikitaxa package
Scott Chamberlain
- Taxonomy data from Wikipedia
The goal of wikitaxa
is to allow search and taxonomic
data retrieval from across many Wikimedia sites, including: Wikipedia,
Wikicommons, and Wikispecies.
There are lower level and higher level parts to the package API:
Low level API
The low level API is meant for power users and gives you more control, but requires more knowledge.
CRAN version
Dev version
wiki data
z <- wt_data("Poa annua")
#> [1] "labels" "descriptions" "aliases" "sitelinks" "claims"
#> language value
#> 1 pt Poa annua
#> 2 is Varpasveifgras
#> 3 pl Wiechlina roczna
#> 4 fr Pâturin annuel
#> 5 es Poa annua
#> 6 en Poa annua
Get a Wikidata ID
wt_data_id("Mimulus foliatus")
#> [1] "Q6495130"
#> attr(,"class")
#> [1] "wiki_id"
lower level
pg <- wt_wiki_page("https://en.wikipedia.org/wiki/Malus_domestica")
res <- wt_wiki_page_parse(pg)
#> [1] "https://commons.wikimedia.org/wiki/Category:Apples"
#> [2] "https://commons.wikimedia.org/wiki/Category:Apple_cultivars"
#> [3] "https://www.wikidata.org/wiki/Q158657"
#> [4] "https://www.wikidata.org/wiki/Q18674606"
#> [5] "https://species.wikimedia.org/wiki/Malus_pumila"
#> [6] "https://species.wikimedia.org/wiki/Malus_domestica"
higher level
res <- wt_wikipedia("Malus domestica")
#> # A tibble: 1 x 2
#> name language
#> <chr> <chr>
#> 1 Apple en
#> # A tibble: 3 x 2
#> rank name
#> <chr> <chr>
#> 1 plainlinks ""
#> 2 binomial "Malus domestica"
#> 3 <NA> ""
choose a wikipedia language
# French
wt_wikipedia(name = "Malus domestica", wiki = "fr")
#> $langlinks
#> # A tibble: 60 x 5
#> lang url langname autonym `*`
#> <chr> <chr> <chr> <chr> <chr>
#> 1 als https://als.wikipedia.org/wiki/Kultu… Alemannis… Alemannis… Kulturapf…
#> 2 am https://am.wikipedia.org/wiki/%E1%89… amharique አማርኛ ቱፋሕ
#> 3 ast https://ast.wikipedia.org/wiki/Malus… asturien asturianu Malus dom…
#> 4 az https://az.wikipedia.org/wiki/M%C9%9… azéri azərbayca… Mədəni al…
#> 5 bat-s… https://bat-smg.wikipedia.org/wiki/V… Samogitian žemaitėška Vuobelės
#> 6 bg https://bg.wikipedia.org/wiki/%D0%94… bulgare български Домашна я…
#> 7 bpy https://bpy.wikipedia.org/wiki/%E0%A… bishnupri… বিষ্ণুপ্র… আপেল
#> 8 ca https://ca.wikipedia.org/wiki/Pomera… catalan català Pomera co…
#> 9 cs https://cs.wikipedia.org/wiki/Jablo%… tchèque čeština Jabloň do…
#> 10 csb https://csb.wikipedia.org/wiki/Dom%C… kachoube kaszëbsczi Domôcô ja…
#> # … with 50 more rows
#> $externallinks
#> [1] "http://www.cabi-publishing.org/pdf/Books/0851995926/0851995926_Chap01.pdf"
#> [2] "http://www.umass.edu/fruitadvisor/fruitnotes/ontheorigin.pdf"
#> [3] "http://www.applegenome.org"
#> [4] "http://societeradio-canada.info/emissions/les_annees_lumiere/2010-2011/"
#> [5] "http://www.nature.com/ng/journal/vaop/ncurrent/full/ng.654.html"
#> [6] "https://gallica.bnf.fr/ark:/12148/bpt6k28582v"
#> [7] "http://worldcat.org/issn/1471-2229&lang=fr"
#> [8] "https://www.ncbi.nlm.nih.gov/pubmed/26924309"
#> [9] "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4770685"
#> [10] "https://dx.doi.org/10.1186%2Fs12870-016-0739-y"
#> [11] "https://doi.org/10.1186/s12870-016-0739-y"
#> [12] "http://www.hamblenne.be/LISTE_RGF.pdf"
#> [13] "http://www.arcticapples.com/blog/john/demystifying-arctic-apples#.UaeX-Jzjmw5"
#> [14] "http://www.cctec.cornell.edu/plants/GENEVA-Apple-Rootstocks-Comparison-Chart-120911.pdf"
#> [15] "https://commons.wikimedia.org/wiki/Category:Malus_domestica?uselang=fr"
#> [16] "http://www.tela-botanica.org/page:eflore"
#> [17] "http://www.tela-botanica.org/bdtfx-nn-40744"
#> [18] "http://www.cbif.gc.ca/acp/fra/siti/regarder?tsn=516655"
#> [19] "http://www.itis.gov/servlet/SingleRpt/SingleRpt?search_topic=TSN&search_value=516655"
#> [20] "http://www.cbif.gc.ca/acp/fra/siti/regarder?tsn=25262"
#> [21] "http://www.itis.gov/servlet/SingleRpt/SingleRpt?search_topic=TSN&search_value=25262"
#> [22] "http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?lin=s&p=has_linkout&id=3750"
#> [23] "http://www.ars-grin.gov/"
#> [24] "https://npgsweb.ars-grin.gov/gringlobal/taxonomydetail.aspx?104681"
#> [25] "https://www.biolib.cz/en/"
#> [26] "https://www.biolib.cz/en/taxon/id39552/"
#> [27] "http://inpn.mnhn.fr/isb/espece/cd_nom/107207"
#> [28] "http://www.bmlisieux.com/normandie/roblet.htm"
#> [29] "http://site.voila.fr/babadubonsai/docum/docmal.html"
#> [30] "http://www.fruitiers.net"
#> [31] "http://www.inra.fr/hyppz/CULTURES/3c---003.htm"
#> [32] "http://cat.inist.fr/?aModele=afficheN&cpsidt=15506238"
#> [33] "http://www.omafra.gov.on.ca/french/crops/facts/98-014.htm"
#> [34] "http://www.gardenaction.co.uk/fruit_veg_diary/fruit_veg_mini_project_september_2_apple.asp"
#> $common_names
#> # A tibble: 1 x 2
#> name language
#> <chr> <chr>
#> 1 Pommier domestique fr
#> $classification
#> # A tibble: 0 x 0
#> $synonyms
#> list()
# Slovak
wt_wikipedia(name = "Malus domestica", wiki = "sk")
#> $langlinks
#> # A tibble: 60 x 5
#> lang url langname autonym `*`
#> <chr> <chr> <chr> <chr> <chr>
#> 1 als https://als.wikipedia.org/wiki/K… Alemannisch Alemannis… Kulturap…
#> 2 am https://am.wikipedia.org/wiki/%E… amharčina አማርኛ ቱፋሕ
#> 3 ast https://ast.wikipedia.org/wiki/M… astúrčina asturianu Malus do…
#> 4 az https://az.wikipedia.org/wiki/M%… azerbajdžančina azərbayca… Mədəni a…
#> 5 bat-s… https://bat-smg.wikipedia.org/wi… Samogitian žemaitėška Vuobelės
#> 6 bg https://bg.wikipedia.org/wiki/%D… bulharčina български Домашна …
#> 7 bpy https://bpy.wikipedia.org/wiki/%… bišnuprijskoma… বিষ্ণুপ্র… আপেল
#> 8 ca https://ca.wikipedia.org/wiki/Po… katalánčina català Pomera c…
#> 9 cs https://cs.wikipedia.org/wiki/Ja… čeština čeština Jabloň d…
#> 10 csb https://csb.wikipedia.org/wiki/D… kašubčina kaszëbsczi Domôcô j…
#> # … with 50 more rows
#> $externallinks
#> list()
#> $common_names
#> # A tibble: 1 x 2
#> name language
#> <chr> <chr>
#> 1 Jabloň domáca sk
#> $classification
#> # A tibble: 0 x 0
#> $synonyms
#> list()
# Vietnamese
wt_wikipedia(name = "Malus domestica", wiki = "vi")
#> $langlinks
#> # A tibble: 60 x 5
#> lang url langname autonym `*`
#> <chr> <chr> <chr> <chr> <chr>
#> 1 als https://als.wikipedia.org/wiki/Kul… Alemannisch Alemanni… Kulturapf…
#> 2 am https://am.wikipedia.org/wiki/%E1%… Tiếng Amha… አማርኛ ቱፋሕ
#> 3 ast https://ast.wikipedia.org/wiki/Mal… Tiếng Astu… asturianu Malus dom…
#> 4 az https://az.wikipedia.org/wiki/M%C9… Tiếng Azer… azərbayc… Mədəni al…
#> 5 zh-min-… https://zh-min-nan.wikipedia.org/w… Chinese (M… Bân-lâm-… Phōng-kó-…
#> 6 bg https://bg.wikipedia.org/wiki/%D0%… Tiếng Bulg… български Домашна я…
#> 7 ca https://ca.wikipedia.org/wiki/Pome… Tiếng Cata… català Pomera co…
#> 8 cs https://cs.wikipedia.org/wiki/Jabl… Tiếng Séc čeština Jabloň do…
#> 9 da https://da.wikipedia.org/wiki/Almi… Tiếng Đan … dansk Almindeli…
#> 10 de https://de.wikipedia.org/wiki/Kult… Tiếng Đức Deutsch Kulturapf…
#> # … with 50 more rows
#> $externallinks
#> [1] "http://biology.umaine.edu/Amelanchier/Rosaceae_2007.pdf"
#> [2] "//dx.doi.org/10.1007%2Fs00606-007-0539-9"
#> [3] "https://npgsweb.ars-grin.gov/gringlobal/taxonomydetail.aspx?410495"
#> [4] "https://npgsweb.ars-grin.gov/gringlobal/taxonomydetail.aspx?30530"
#> [5] "http://www.uga.edu/fruit/apple.html"
#> [6] "//dx.doi.org/10.3732%2Fajb.93.3.357"
#> [7] "http://www.plosgenetics.org/article/info:doi%2F10.1371%2Fjournal.pgen.1002703"
#> [8] "//www.ncbi.nlm.nih.gov/pmc/articles/PMC3349737"
#> [9] "//www.ncbi.nlm.nih.gov/pubmed/22589740"
#> [10] "//dx.doi.org/10.1371%2Fjournal.pgen.1002703"
#> [11] "http://news.sciencemag.org/sciencenow/2012/05/scienceshot-the-secret-history-o.html"
#> [12] "http://www.plantpress.com/wildlife/o523-apple.php"
#> [13] "http://cahnrsnews.wsu.edu/2010/08/29/apple-cup-rivals-contribute-to-apple-genome-sequencing/"
#> [14] "http://www.nature.com/ng/journal/v42/n10/full/ng.654.html"
#> [15] "http://www.alphagalileo.org/ViewItem.aspx?ItemId=83717&CultureCode=en"
#> [16] "http://www.ornl.gov/sci/techresources/Human_Genome/project/info.shtml"
#> [17] "https://commons.wikimedia.org/wiki/Apple?uselang=vi"
#> [18] "https://commons.wikimedia.org/wiki/Category:Malus_domestica?uselang=vi"
#> [19] "http://bachkhoatoanthu.vass.gov.vn/noidung/tudien/Lists/GiaiNghia/View_Detail.aspx?ItemID=5007"
#> [20] "http://www.eol.org/pages/629094"
#> [21] "http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=3750"
#> [22] "http://www.itis.gov/servlet/SingleRpt/SingleRpt?search_topic=TSN&search_value=516655"
#> [23] "http://www.catalogueoflife.org/col/details/species/id/19538828/synonym/19539435"
#> [24] "https://www.biolib.cz/cz/taxon/id39552"
#> [25] "https://gd.eppo.int/taxon/MABSD"
#> [26] "https://npgsweb.ars-grin.gov/gringlobal/taxonomydetail.aspx?id=104681"
#> [27] "http://www.ipni.org/ipni/idPlantNameSearch.do?id=726282-1"
#> [28] "https://www.itis.gov/servlet/SingleRpt/SingleRpt?search_topic=TSN&search_value=516655"
#> [29] "https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=3750"
#> [30] "http://www.nzor.org.nz/names/14d024a2-d821-48e3-95d8-f0dd206c70a0"
#> [31] "http://www.pfaf.org/user/Plant.aspx?LatinName=Malus+domestica"
#> [32] "http://www.theplantlist.org/tpl1.1/record/rjp-454"
#> [33] "http://www.plantsoftheworldonline.org/taxon/urn:lsid:ipni.org:names:726282-1"
#> [34] "http://legacy.tropicos.org/Name/27804420"
#> [35] "https://vicflora.rbg.vic.gov.au/flora/taxon/e41b929d-b709-4f4c-8dbe-2a9241e2342b"
#> [36] "http://www.ipni.org/ipni/idPlantNameSearch.do?id=60476301-2"
#> [37] "http://www.plantsoftheworldonline.org/taxon/urn:lsid:ipni.org:names:60476301-2"
#> [38] "http://legacy.tropicos.org/Name/100473089"
#> $common_names
#> # A tibble: 1 x 2
#> name language
#> <chr> <chr>
#> 1 Malus domestica vi
#> $classification
#> # A tibble: 0 x 0
#> $synonyms
#> list()
wt_wikipedia_search(query = "Pinus")
#> $batchcomplete
#> [1] ""
#> $continue
#> $continue$sroffset
#> [1] 10
#> $continue$continue
#> [1] "-||"
#> $query
#> $query$searchinfo
#> $query$searchinfo$totalhits
#> [1] 3374
#> $query$searchinfo$suggestion
#> [1] "penis"
#> $query$searchinfo$suggestionsnippet
#> [1] "penis"
#> $query$search
#> # A tibble: 10 x 7
#> ns title pageid size wordcount snippet timestamp
#> <int> <chr> <int> <int> <int> <chr> <chr>
#> 1 0 Pine 39389 36555 4058 "A pine is any conifer in… 2020-06-26…
#> 2 0 Pinus po… 532941 31087 3069 "misidentified it as <spa… 2020-06-19…
#> 3 0 Pinus co… 507717 20486 2343 "all pines (member specie… 2020-05-25…
#> 4 0 Pinus je… 463015 9130 1008 "long, with a large (15 t… 2019-12-22…
#> 5 0 Pinus st… 464301 31478 3815 "3 ft) tall & wide. M… 2020-06-22…
#> 6 0 Pinus re… 507802 7501 783 ""<span class=\"sear… 2020-05-08…
#> 7 0 Pinus lo… 649634 15408 1741 "sometimes form dense for… 2020-04-14…
#> 8 0 Pinus la… 459402 11464 1338 "Fire affected this speci… 2020-01-15…
#> 9 0 Pinus ni… 438963 11947 1421 "hypodermal cells. P. nig… 2020-04-03…
#> 10 0 Pinus mu… 438946 11964 901 "encyclopedia) is still r… 2020-06-17…
search supports languages
wt_wikipedia_search(query = "Pinus", wiki = "fr")
#> $batchcomplete
#> [1] ""
#> $continue
#> $continue$sroffset
#> [1] 10
#> $continue$continue
#> [1] "-||"
#> $query
#> $query$searchinfo
#> $query$searchinfo$totalhits
#> [1] 990
#> $query$search
#> # A tibble: 10 x 7
#> ns title pageid size wordcount snippet timestamp
#> <int> <chr> <int> <int> <int> <chr> <chr>
#> 1 0 Pin (pl… 89798 83647 9325 "<span class=\"searchmatc… 2020-05-23…
#> 2 0 Pinus p… 121544 31274 3892 "<span class=\"searchmatc… 2020-04-30…
#> 3 0 Pinus c… 98421 8237 959 "<span class=\"searchmatc… 2019-05-30…
#> 4 0 Pinus n… 950330 26623 3013 "recycler}}. <span class=… 2020-03-30…
#> 5 0 Pin syl… 121562 13725 1611 "<span class=\"searchmatc… 2020-03-22…
#> 6 0 Pinus h… 117280 22257 2671 "<span class=\"searchmatc… 2020-05-07…
#> 7 0 Pin par… 138378 8763 916 "<span class=\"searchmatc… 2020-04-26…
#> 8 0 Pinus s… 776950 11662 1628 "les articles homonymes, … 2020-04-14…
#> 9 0 Pinus m… 2480854 21747 2310 "<span class=\"searchmatc… 2019-02-25…
#> 10 0 Pinus u… 3208429 6316 720 "significations, voir Pin… 2020-03-09…
lower level
pg <- wt_wiki_page("https://commons.wikimedia.org/wiki/Abelmoschus")
res <- wt_wikicommons_parse(pg)
#> [[1]]
#> [[1]]$name
#> [1] "okra"
#> [[1]]$language
#> [1] "en"
#> [[2]]
#> [[2]]$name
#> [1] "مسكي"
#> [[2]]$language
#> [1] "ar"
#> [[3]]
#> [[3]]$name
#> [1] "Abelmoş"
#> [[3]]$language
#> [1] "az"
higher level
res <- wt_wikicommons("Abelmoschus")
#> # A tibble: 15 x 2
#> rank name
#> <chr> <chr>
#> 1 Domain "Eukaryota"
#> 2 unranked "Archaeplastida"
#> 3 Regnum "Plantae"
#> 4 Cladus "angiosperms"
#> 5 Cladus "eudicots"
#> 6 Cladus "core eudicots"
#> 7 Cladus "superrosids"
#> 8 Cladus "rosids"
#> 9 Cladus "eurosids II"
#> 10 Ordo "Malvales"
#> 11 Familia "Malvaceae"
#> 12 Subfamilia "Malvoideae"
#> 13 Tribus "Hibisceae"
#> 14 Genus "Abelmoschus"
#> 15 Authority " Medik. (1787)"
#> # A tibble: 19 x 2
#> name language
#> <chr> <chr>
#> 1 okra en
#> 2 مسكي ar
#> 3 Abelmoş az
#> 4 Bamja bs
#> 5 Ibiškovec cs
#> 6 Bisameibisch de
#> 7 Okrat fi
#> 8 Abelmosco gl
#> 9 Abelmošus hr
#> 10 Ybiškė lt
#> 11 അബെൽമോസ്കസ് ml
#> 12 Абельмош mrj
#> 13 Abelmoskusslekta nn
#> 14 Piżmian pl
#> 15 Абельмош ru
#> 16 Okrasläktet sv
#> 17 Абельмош udm
#> 18 Chi Vông vang vi
#> 19 黄葵属 zh
wt_wikicommons_search(query = "Pinus")
#> $batchcomplete
#> [1] ""
#> $continue
#> $continue$sroffset
#> [1] 10
#> $continue$continue
#> [1] "-||"
#> $query
#> $query$searchinfo
#> $query$searchinfo$totalhits
#> [1] 270
#> $query$search
#> # A tibble: 10 x 7
#> ns title pageid size wordcount snippet timestamp
#> <int> <chr> <int> <lgl> <int> <chr> <chr>
#> 1 0 Pinus sylvestris 9066 NA 0 "" 2020-05-13T19:…
#> 2 0 Pinus ponderosa 250435 NA 0 "" 2020-04-18T15:…
#> 3 0 Pinus nigra 64703 NA 0 "" 2018-03-06T10:…
#> 4 0 Pinus 82071 NA 0 "" 2017-05-28T10:…
#> 5 0 Pinus mugo 132442 NA 0 "" 2019-07-26T09:…
#> 6 0 Rogów Arboretum 10563490 NA 0 "" 2020-01-01T13:…
#> 7 0 Pinus contorta 186918 NA 0 "" 2020-01-19T19:…
#> 8 0 Anacortes Community F… 2989013 NA 0 "" 2014-12-10T15:…
#> 9 0 Pinus halepensis 172181 NA 0 "" 2018-05-05T10:…
#> 10 0 Pinus brutia 139389 NA 0 "" 2014-11-23T11:…
lower level
pg <- wt_wiki_page("https://species.wikimedia.org/wiki/Malus_domestica")
res <- wt_wikispecies_parse(pg, types = "common_names")
#> [[1]]
#> [[1]]$name
#> [1] "Ябълка"
#> [[1]]$language
#> [1] "български"
#> [[2]]
#> [[2]]$name
#> [1] "Poma, pomera"
#> [[2]]$language
#> [1] "català"
#> [[3]]
#> [[3]]$name
#> [1] "jabloň domácí"
#> [[3]]$language
#> [1] "čeština"
higher level
res <- wt_wikispecies("Malus domestica")
#> # A tibble: 8 x 2
#> rank name
#> <chr> <chr>
#> 1 Superregnum Eukaryota
#> 2 Regnum Plantae
#> 3 Cladus Angiosperms
#> 4 Cladus Eudicots
#> 5 Cladus Core eudicots
#> 6 Cladus Rosids
#> 7 Cladus Eurosids I
#> 8 Ordo Rosales
#> # A tibble: 22 x 2
#> name language
#> <chr> <chr>
#> 1 Ябълка български
#> 2 Poma, pomera català
#> 3 jabloň domácí čeština
#> 4 Apfel Deutsch
#> 5 Μηλιά Ελληνικά
#> 6 Apple English
#> 7 Manzano español
#> 8 Aed-õunapuu eesti
#> 9 Tarhaomenapuu suomi
#> 10 Aapel Nordfriisk
#> # … with 12 more rows
wt_wikispecies_search(query = "Pinus")
#> $batchcomplete
#> [1] ""
#> $continue
#> $continue$sroffset
#> [1] 10
#> $continue$continue
#> [1] "-||"
#> $query
#> $query$searchinfo
#> $query$searchinfo$totalhits
#> [1] 515
#> $query$search
#> # A tibble: 10 x 7
#> ns title pageid size wordcount snippet timestamp
#> <int> <chr> <int> <int> <int> <chr> <chr>
#> 1 0 Pinus 1.74e4 5737 784 "Familia: Pinaceae Ge… 2020-06-06…
#> 2 0 Pinus halepe… 4.51e4 4047 580 "Pinaceae Genus: <spa… 2019-12-20…
#> 3 0 Pinus pinea 4.51e4 1949 406 "Familia: Pinaceae Ge… 2019-10-19…
#> 4 0 Pinus veitch… 1.34e6 1450 181 "Familia: Pinaceae Ge… 2019-07-19…
#> 5 0 Pinus pumila 7.35e4 1395 189 "Pinaceae Genus: <spa… 2019-07-14…
#> 6 0 Pinus subg. … 3.01e5 358 27 "Pinaceae Genus: <spa… 2019-11-24…
#> 7 0 Pinus clausa 4.50e4 1552 208 "Pinaceae Genus: <spa… 2019-08-15…
#> 8 0 Pinus pseudo… 1.48e6 2114 310 "Genus: <span class=\… 2020-05-21…
#> 9 0 Pinus pinast… 1.32e6 2764 379 "Pinaceae Genus: <spa… 2019-12-20…
#> 10 0 Pinus nigra … 3.27e5 1799 138 "Genus: <span class=\… 2020-03-02…