Getting started with the robis package

First load the package:

library(robis)

Occurrences

The occurrence() function provides access to raw occurrence data. For example, to fetch all occurrences by scientific name:

occurrence("Abra aequalis")
#> # A tibble: 767 x 99
#>    country   date_year scientificNameID       year  scientificName superfamilyid
#>    <chr>         <int> <chr>                  <chr> <chr>                  <int>
#>  1 United S…      1976 urn:lsid:marinespecie… 1976  Abra aequalis          14636
#>  2 United S…      1977 urn:lsid:marinespecie… 1977  Abra aequalis          14636
#>  3 United S…      1979 urn:lsid:marinespecie… 1979  Abra aequalis          14636
#>  4 <NA>           1986 urn:lsid:marinespecie… 1986  Abra aequalis          14636
#>  5 <NA>           2013 urn:lsid:marinespecie… <NA>  Abra aequalis          14636
#>  6 <NA>           1994 urn:lsid:marinespecie… <NA>  Abra aequalis          14636
#>  7 <NA>           1972 urn:lsid:marinespecie… <NA>  Abra aequalis          14636
#>  8 <NA>           1996 urn:lsid:marinespecie… <NA>  Abra aequalis          14636
#>  9 <NA>           2013 urn:lsid:marinespecie… <NA>  Abra aequalis          14636
#> 10 <NA>           2001 urn:lsid:marinespecie… <NA>  Abra aequalis          14636
#> # … with 757 more rows, and 93 more variables: individualCount <chr>,
#> #   dropped <lgl>, aphiaID <int>, decimalLatitude <dbl>, subclassid <int>,
#> #   phylumid <int>, familyid <int>, basisOfRecord <chr>, subterclassid <int>,
#> #   maximumDepthInMeters <dbl>, id <chr>, day <chr>, order <chr>,
#> #   dataset_id <chr>, decimalLongitude <dbl>, collectionCode <chr>,
#> #   date_end <dbl>, speciesid <int>, superfamily <chr>, date_start <dbl>,
#> #   month <chr>, genus <chr>, bibliographicCitation <chr>, subterclass <chr>,
#> #   eventDate <chr>, superorder <chr>, coordinateUncertaintyInMeters <chr>,
#> #   absence <lgl>, superorderid <int>, genusid <int>,
#> #   originalScientificName <chr>, marine <lgl>, minimumDepthInMeters <dbl>,
#> #   infraclassid <int>, institutionCode <chr>, date_mid <dbl>,
#> #   infraclass <chr>, class <chr>, orderid <int>, waterBody <chr>,
#> #   kingdom <chr>, recordedBy <chr>, classid <int>, phylum <chr>,
#> #   species <chr>, subclass <chr>, family <chr>, kingdomid <int>,
#> #   node_id <chr>, flags <chr>, sss <dbl>, shoredistance <int>, sst <dbl>,
#> #   bathymetry <int>, dynamicProperties <chr>, associatedReferences <chr>,
#> #   fieldNumber <chr>, catalogNumber <chr>, locality <chr>,
#> #   stateProvince <chr>, scientificNameAuthorship <chr>, preparations <chr>,
#> #   depth <dbl>, identifiedBy <chr>, type <chr>, taxonRemarks <chr>,
#> #   occurrenceStatus <chr>, materialSampleID <chr>, occurrenceID <chr>,
#> #   ownerInstitutionCode <chr>, samplingProtocol <chr>, taxonRank <chr>,
#> #   datasetName <chr>, datasetID <chr>, collectionID <chr>, eventID <chr>,
#> #   habitat <chr>, associatedMedia <lgl>, associatedSequences <lgl>,
#> #   dateIdentified <chr>, verbatimDepth <chr>, occurrenceRemarks <chr>,
#> #   county <chr>, modified <lgl>, infraspecificEpithet <lgl>,
#> #   recordNumber <lgl>, higherGeography <chr>, continent <chr>,
#> #   typeStatus <lgl>, geodeticDatum <lgl>, specificEpithet <chr>,
#> #   georeferenceSources <lgl>, coordinatePrecision <chr>

Alternatively, occurrences can be fetched by AphiaID:

occurrence(taxonid = 293683)
#> # A tibble: 767 x 99
#>    country   date_year scientificNameID       year  scientificName superfamilyid
#>    <chr>         <int> <chr>                  <chr> <chr>                  <int>
#>  1 United S…      1976 urn:lsid:marinespecie… 1976  Abra aequalis          14636
#>  2 United S…      1977 urn:lsid:marinespecie… 1977  Abra aequalis          14636
#>  3 United S…      1979 urn:lsid:marinespecie… 1979  Abra aequalis          14636
#>  4 <NA>           1986 urn:lsid:marinespecie… 1986  Abra aequalis          14636
#>  5 <NA>           2013 urn:lsid:marinespecie… <NA>  Abra aequalis          14636
#>  6 <NA>           1994 urn:lsid:marinespecie… <NA>  Abra aequalis          14636
#>  7 <NA>           1972 urn:lsid:marinespecie… <NA>  Abra aequalis          14636
#>  8 <NA>           1996 urn:lsid:marinespecie… <NA>  Abra aequalis          14636
#>  9 <NA>           2013 urn:lsid:marinespecie… <NA>  Abra aequalis          14636
#> 10 <NA>           2001 urn:lsid:marinespecie… <NA>  Abra aequalis          14636
#> # … with 757 more rows, and 93 more variables: individualCount <chr>,
#> #   dropped <lgl>, aphiaID <int>, decimalLatitude <dbl>, subclassid <int>,
#> #   phylumid <int>, familyid <int>, basisOfRecord <chr>, subterclassid <int>,
#> #   maximumDepthInMeters <dbl>, id <chr>, day <chr>, order <chr>,
#> #   dataset_id <chr>, decimalLongitude <dbl>, collectionCode <chr>,
#> #   date_end <dbl>, speciesid <int>, superfamily <chr>, date_start <dbl>,
#> #   month <chr>, genus <chr>, bibliographicCitation <chr>, subterclass <chr>,
#> #   eventDate <chr>, superorder <chr>, coordinateUncertaintyInMeters <chr>,
#> #   absence <lgl>, superorderid <int>, genusid <int>,
#> #   originalScientificName <chr>, marine <lgl>, minimumDepthInMeters <dbl>,
#> #   infraclassid <int>, institutionCode <chr>, date_mid <dbl>,
#> #   infraclass <chr>, class <chr>, orderid <int>, waterBody <chr>,
#> #   kingdom <chr>, recordedBy <chr>, classid <int>, phylum <chr>,
#> #   species <chr>, subclass <chr>, family <chr>, kingdomid <int>,
#> #   node_id <chr>, flags <chr>, sss <dbl>, shoredistance <int>, sst <dbl>,
#> #   bathymetry <int>, dynamicProperties <chr>, associatedReferences <chr>,
#> #   fieldNumber <chr>, catalogNumber <chr>, locality <chr>,
#> #   stateProvince <chr>, scientificNameAuthorship <chr>, preparations <chr>,
#> #   depth <dbl>, identifiedBy <chr>, type <chr>, taxonRemarks <chr>,
#> #   occurrenceStatus <chr>, materialSampleID <chr>, occurrenceID <chr>,
#> #   ownerInstitutionCode <chr>, samplingProtocol <chr>, taxonRank <chr>,
#> #   datasetName <chr>, datasetID <chr>, collectionID <chr>, eventID <chr>,
#> #   habitat <chr>, associatedMedia <lgl>, associatedSequences <lgl>,
#> #   dateIdentified <chr>, verbatimDepth <chr>, occurrenceRemarks <chr>,
#> #   county <chr>, modified <lgl>, infraspecificEpithet <lgl>,
#> #   recordNumber <lgl>, higherGeography <chr>, continent <chr>,
#> #   typeStatus <lgl>, geodeticDatum <lgl>, specificEpithet <chr>,
#> #   georeferenceSources <lgl>, coordinatePrecision <chr>

Other parameters include geometry, which accepts polygons in WKT format:

occurrence("Abra alba", geometry = "POLYGON ((2.59689 51.16772, 2.62436 51.14059, 2.76066 51.19225, 2.73216 51.20946, 2.59689 51.16772))")
#> # A tibble: 319 x 85
#>    date_year scientificNameID year  scientificName superfamilyid individualCount
#>        <int> <chr>            <chr> <chr>                  <int> <chr>          
#>  1      1998 urn:lsid:marine… 1998  Abra alba              14636 24.0           
#>  2      2000 urn:lsid:marine… 2000  Abra alba              14636 <NA>           
#>  3      1995 urn:lsid:marine… 1995  Abra alba              14636 127.0          
#>  4      1997 urn:lsid:marine… 1997  Abra alba              14636 10.0           
#>  5      1989 urn:lsid:marine… 1989  Abra alba              14636 20.0           
#>  6      1996 urn:lsid:marine… 1996  Abra alba              14636 1.0            
#>  7      2013 urn:lsid:marine… 2013  Abra alba              14636 <NA>           
#>  8      2007 urn:lsid:marine… 2007  Abra alba              14636 <NA>           
#>  9      2016 urn:lsid:marine… 2016  Abra alba              14636 <NA>           
#> 10      1981 urn:lsid:marine… 1981  Abra alba              14636 9.0            
#> # … with 309 more rows, and 79 more variables: dropped <lgl>,
#> #   fieldNumber <chr>, aphiaID <int>, decimalLatitude <dbl>, subclassid <int>,
#> #   phylumid <int>, familyid <int>, catalogNumber <chr>,
#> #   occurrenceStatus <chr>, basisOfRecord <chr>, subterclassid <int>,
#> #   modified <chr>, maximumDepthInMeters <dbl>, id <chr>, day <chr>,
#> #   order <chr>, dataset_id <chr>, locality <chr>, decimalLongitude <dbl>,
#> #   collectionCode <chr>, date_end <dbl>, speciesid <int>, occurrenceID <chr>,
#> #   superfamily <chr>, date_start <dbl>, month <chr>, genus <chr>,
#> #   samplingProtocol <chr>, subterclass <chr>, eventDate <chr>, eventID <chr>,
#> #   superorder <chr>, absence <lgl>, samplingEffort <chr>, superorderid <int>,
#> #   genusid <int>, originalScientificName <chr>, marine <lgl>,
#> #   minimumDepthInMeters <dbl>, infraclassid <int>, institutionCode <chr>,
#> #   date_mid <dbl>, infraclass <chr>, class <chr>, orderid <int>, sex <chr>,
#> #   geodeticDatum <chr>, kingdom <chr>, recordedBy <chr>, classid <int>,
#> #   phylum <chr>, lifeStage <chr>, species <chr>, subclass <chr>,
#> #   datasetID <chr>, family <chr>, kingdomid <int>, node_id <chr>, flags <chr>,
#> #   sss <dbl>, shoredistance <int>, sst <dbl>, bathymetry <dbl>,
#> #   language <chr>, footprintSRS <chr>, datasetName <chr>, waterBody <chr>,
#> #   depth <dbl>, country <chr>, references <chr>, dynamicProperties <chr>,
#> #   bibliographicCitation <chr>, continent <chr>,
#> #   scientificNameAuthorship <chr>, specificEpithet <chr>, verbatimDepth <chr>,
#> #   occurrenceRemarks <chr>, footprintWKT <chr>, locationID <chr>

A convenience function map_leaflet() is provided to visualize occurrences on an interactive map:

map_leaflet(occurrence("Abra sibogai"))

Checklists

The checklist() function returns all taxa observed for a given set of filters.

checklist("Semelidae")
#> # A tibble: 108 x 41
#>    scientificName   scientificNameAut… taxonID ncbi_id taxonRank taxonomicStatus
#>    <chr>            <chr>                <int>   <int> <chr>     <chr>          
#>  1 Abra alba        (W. Wood, 1802)     141433  399303 Species   accepted       
#>  2 Abra nitida      (O. F. Müller, 17…  141435  358434 Species   accepted       
#>  3 Scrobicularia p… (da Costa, 1778)    141424  665965 Species   accepted       
#>  4 Abra prismatica  (Montagu, 1808)     141436  183592 Species   accepted       
#>  5 Abra tenuis      (Montagu, 1803)     141439      NA Species   accepted       
#>  6 Abra             Lamarck, 1818       138474  121180 Genus     accepted       
#>  7 Abra segmentum   (Récluz, 1843)      141438      NA Species   accepted       
#>  8 Theora lubrica   Gould, 1861         233903 1230554 Species   accepted       
#>  9 Semelidae        Stoliczka, 1870 (…    1781  121179 Family    accepted       
#> 10 Abra aequalis    (Say, 1822)         293683 2175524 Species   accepted       
#> # … with 98 more rows, and 35 more variables: acceptedNameUsage <chr>,
#> #   acceptedNameUsageID <int>, is_marine <lgl>, is_brackish <lgl>,
#> #   kingdom <chr>, phylum <chr>, class <chr>, subclass <chr>, infraclass <chr>,
#> #   subterclass <chr>, superorder <chr>, order <chr>, superfamily <chr>,
#> #   family <chr>, kingdomid <int>, phylumid <int>, classid <int>,
#> #   subclassid <int>, infraclassid <int>, subterclassid <int>,
#> #   superorderid <int>, orderid <int>, superfamilyid <int>, familyid <int>,
#> #   records <int>, genus <chr>, genusid <int>, species <chr>, speciesid <int>,
#> #   bold_id <int>, is_freshwater <lgl>, is_terrestrial <lgl>, wrims <lgl>,
#> #   subspecies <chr>, subspeciesid <int>

Just like the occurrence() function, checklist() accepts WKT geometries:

checklist(geometry = "POLYGON ((2.59689 51.16772, 2.62436 51.14059, 2.76066 51.19225, 2.73216 51.20946, 2.59689 51.16772))")
#> # A tibble: 902 x 73
#>    scientificName      taxonID ncbi_id taxonomicStatus acceptedNameUsage  
#>    <chr>                 <int>   <int> <chr>           <chr>              
#>  1 Nematoda                799    6231 accepted        Nematoda           
#>  2 Abra alba            141433  399303 accepted        Abra alba          
#>  3 Sabatieria celtica   121360  319964 accepted        Sabatieria celtica 
#>  4 Sabatieria punctata  153130  320140 accepted        Sabatieria punctata
#>  5 Spiophanes bombyx    131187  696728 accepted        Spiophanes bombyx  
#>  6 Kurtiella bidentata  345281 1177057 accepted        Kurtiella bidentata
#>  7 Nephtys hombergii    130359   36121 accepted        Nephtys hombergii  
#>  8 Oligochaeta            2036      NA accepted        Oligochaeta        
#>  9 Cirratulidae            919   46590 accepted        Cirratulidae       
#> 10 Fabulina fabula      146907      NA accepted        Fabulina fabula    
#> # … with 892 more rows, and 68 more variables: acceptedNameUsageID <int>,
#> #   is_marine <lgl>, is_brackish <lgl>, is_freshwater <lgl>,
#> #   is_terrestrial <lgl>, records <int>, taxonRank <chr>, kingdom <chr>,
#> #   kingdomid <int>, phylum <chr>, phylumid <int>,
#> #   scientificNameAuthorship <chr>, class <chr>, classid <int>, subclass <chr>,
#> #   order <chr>, superfamily <chr>, family <chr>, subclassid <int>,
#> #   orderid <int>, superfamilyid <int>, familyid <int>, infraclass <chr>,
#> #   infraclassid <int>, subterclass <chr>, superorder <chr>,
#> #   subterclassid <int>, superorderid <int>, suborder <chr>, suborderid <int>,
#> #   subfamily <chr>, subfamilyid <int>, subphylum <chr>, subphylumid <int>,
#> #   superclass <chr>, superclassid <int>, subkingdom <chr>, infrakingdom <chr>,
#> #   subkingdomid <int>, infrakingdomid <int>, genus <chr>, genusid <int>,
#> #   infraphylum <chr>, infraphylumid <int>, hab <lgl>, bold_id <int>,
#> #   species <chr>, speciesid <int>, infraorder <chr>, parvorder <chr>,
#> #   infraorderid <int>, parvorderid <int>, tribe <chr>, tribeid <int>,
#> #   wrims <lgl>, subgenus <chr>, subgenusid <int>, category <chr>,
#> #   section <chr>, subsection <chr>, sectionid <int>, subsectionid <int>,
#> #   subspecies <chr>, subspeciesid <int>, variety <chr>, varietyid <int>,
#> #   forma <chr>, formaid <int>

Measurements and facts

The package also provides access to MeasurementOrFact records associated with occurrences. When calling occurrence(), MeasurementOrFact records can be included by setting mof = true.

occ <- occurrence("Abra tenuis", mof = TRUE)

MeasurementOrFact records are nested in the occurrence, but the measurements() function allows you to extract them to a flat data frame. Use the fields parameter to indicate which occurrence fields need to be preserved in the measurements table.

mof <- measurements(occ, fields = c("scientificName", "decimalLongitude", "decimalLatitude"))
mof
#> # A tibble: 19,394 x 17
#>    scientificName decimalLongitude decimalLatitude measurementDeterminedBy
#>    <chr>                     <dbl>           <dbl> <chr>                  
#>  1 Abra tenuis               -1.19            46.3 <NA>                   
#>  2 Abra tenuis               -1.19            46.3 <NA>                   
#>  3 Abra tenuis               -1.19            46.3 <NA>                   
#>  4 Abra tenuis               -1.19            46.3 <NA>                   
#>  5 Abra tenuis               -1.19            46.3 <NA>                   
#>  6 Abra tenuis               -1.20            46.3 <NA>                   
#>  7 Abra tenuis               -1.20            46.3 <NA>                   
#>  8 Abra tenuis               -1.20            46.3 <NA>                   
#>  9 Abra tenuis               -1.20            46.3 <NA>                   
#> 10 Abra tenuis               -1.20            46.3 <NA>                   
#> # … with 19,384 more rows, and 13 more variables: measurementAccuracy <chr>,
#> #   measurementValue <chr>, measurementRemarks <chr>, measurementValueID <chr>,
#> #   level <int>, occurrenceID <chr>, measurementUnit <chr>,
#> #   measurementDeterminedDate <chr>, measurementType <chr>,
#> #   measurementUnitID <chr>, measurementTypeID <chr>, measurementID <chr>,
#> #   measurementMethod <chr>

Note that the MeasurementOrFact fields can be used as parameters to the occurrence() function. For example, to only get occurrences with associated biomass measurements:

library(dplyr)

occurrence("Abra tenuis", mof = TRUE, measurementtype = "biomass") %>%
  measurements()
#> # A tibble: 29 x 15
#>    id       measurementDeter… measurementAccu… measurementValue measurementRema…
#>    <chr>    <lgl>             <lgl>            <chr>            <lgl>           
#>  1 10582b4… NA                NA               0,04             NA              
#>  2 10582b4… NA                NA               mire with debri… NA              
#>  3 13f2c93… NA                NA               0,07             NA              
#>  4 13f2c93… NA                NA               mire             NA              
#>  5 1b72b8b… NA                NA               0,12             NA              
#>  6 1b72b8b… NA                NA               mire             NA              
#>  7 208286b… NA                NA               0,11             NA              
#>  8 208286b… NA                NA               fine sand        NA              
#>  9 2a98512… NA                NA               0,05             NA              
#> 10 2a98512… NA                NA               mire with debris NA              
#> # … with 19 more rows, and 10 more variables: measurementValueID <lgl>,
#> #   level <int>, occurrenceID <chr>, measurementUnit <chr>,
#> #   measurementDeterminedDate <lgl>, measurementType <chr>,
#> #   measurementUnitID <chr>, measurementTypeID <lgl>, measurementID <lgl>,
#> #   measurementMethod <lgl>