# INBO CODING CLUB 26 January 2021 # Connect to INBO databases in R Welcome! ## Share your code snippet If you want to share your code snippet, copy paste your snippet within a section of three backticks (```): As an **example**: ``` library(tidyverse) ``` (*you can copy paste this example and add your code further down*) ## Yellow sticky notes No yellow sticky notes online. Put your name + " | " and add a "*" each time you solve a challenge (see below). ## Participants Name | Challenges --- | --- Hans Van Calster | Emma Cartuyvels|** Patrik Oosterlynck|** An Leyssen |** Charlotte Van Driessche | *** Raïsa Carmen |*** joost vanoverbeke | * Frank Huysentruyt | *** Cécile Herr |** Amber Mertens | *** Els Lommelen | Jo Loos | * * Wim Mertens | *** Tom De Dobbelaer | Aaike De Wever | (*) Els De Bie |** Anja Leyman|** Thierry | Luc DB | Floris Vdh | * ## Challenge 1 Hans: ``` mycode < - ... ``` ## Challenge 2 Cécile: ``` # Watina #' a. How many area codes exist in watina database? locations <- get_locs(watina) locations %>% distinct(area_code) %>% count() #' b. select locations from watina database with #`area_codes` "KAL", "WES" or "ZAB" # and depth range between 2 and 4 meters # Florabank get_florabank_observations(florabank, "Erigeron", fixed = FALSE, collect = FALSE) # Inboveg get_inboveg_survey(inboveg, "ABS-LIM2011", collect = FALSE) ``` Emma: ``` #' 1. Using watina package and always in lazy mode: #' a. How many area codes exist in watina database? locations <- get_locs(watina) locations %>% distinct(area_code) %>% count() #' b. select locations from watina database with `area_codes` "KAL", "WES" or #' "ZAB" and depth range between 2 and 4 meters locations <- get_locs(watina, filterdepth_range = c(2,4), area_codes = c("KAL", "WES", "ZAB")) #' 2. Using inbodb and florabank database and still being lazy: #' get observations of taxon Erigeron from florabank db: erigeron <- get_florabank_observations(florabank, names = "Erigeron") #' 3. Using inbodb and inboveg connection and still being lazy: #' search information about survey "ABS-LIM2011": survey <- get_inboveg_survey(inboveg, "ABS-LIM2011") #' How to collect the data queried above? collect() ``` Wim: ``` #' 1. Using watina package and always in lazy mode: #' a. How many area codes exist in watina database? loc_n <- get_locs((watina)) %>% summarise(n_distinct(area_code)) #' b. select locations from watina database with `area_codes` "KAL", "WES" or "ZAB" and depth range between 2 and 4 meters locations1 <- get_locs(watina, filterdepth_range = c(2,4)) %>% filter(area_name %in% c("KAL", "WES", "ZAB")) #' 2. Using inbodb and florabank database and still being lazy: #' get observations of taxon Erigeron from florabank db: Erigeron <- get_florabank_observations(florabank, names = "Erigeron") #' 3. Using inbodb and inboveg connection and still being lazy: #' search information about survey "ABS-LIM2011": ABSLIM <- get_inboveg_survey(inboveg, "ABS-LIM2011") #' How to collect the data queried above? loc_n_df <- collect(loc_n) Erigeron_df <- collect(Erigeron) ABSLIM_df <- collect(ABSLIM) ``` An: ``` data1 <- get_locs(watina) %>% summarise(aantal = n()) selectie <- c("KAL", "WES", "ZAB") data2 <- get_locs(watina, area_codes = selectie, filterdepth_range = c(2,4)) data3 <- get_florabank_observations(connection = florabank, names = "Erigeron") data4 <- get_inboveg_survey(connection = inboveg, survey_name = "ABS-LIM2011") data1 <- collect(data1) data2 <- collect(data2) data3 <- collect(data3) data4 <- collect(data4) ``` Frank: ``` get_locs(watina) %>% group_by(area_code) %>% summarize() %>% collect() get_locs(watina) %>% filter(area_code %in% c("KAL","WES","ZAB")) %>% filter(filterdepth>2 & filterdepth<4) %>% collect() get_florabank_observations(florabank, names="Erigeron") ``` Thierry: ``` get_locs(watina) %>% filter( area_code %in% c("KAL", "WES", "ZAB"), 2 < filterdepth, filterdepth < 4 ) ``` ## INTERMEZZO ## Challenge 3 Thierry: ``` tbl(florabank, "tblTaxon") %>% head(10) tbl(florabank, "tblTaxon") %>% filter(NaamNederlands == "Slank snavelmos") %>% pull(NaamWetenschappelijk) "SELECT NaamWetenschappelijk FROM tblTaxon WHERE NaamNederlands LIKE 'Slank%'" %>% sql() %>% tbl(src = florabank) ``` Frank ``` tbl(src = florabank, "tblTaxon") %>% head(tbl,n=10L) tbl(src = florabank, "tblTaxon") %>% filter(NaamNederlands=="Slank snavelmos") %>% select(NaamWetenschappelijk) tbl(src = florabank, "tblTaxon") %>% filter(substr(NaamNederlands,1,5)=="Slank") %>% select(NaamWetenschappelijk) tbl(src = inboveg, "ivRecording") %>% filter(Latitude >50.9 & Latitude <51.1) %>% filter(Longitude >3.5 & Longitude <3.9) %>% group_by(LocationCode) %>% summarise(number = n_distinct(Id)) %>% arrange(desc(number)) colnames(tbl(src = inboveg, "ivRecording")) ``` Cécile (in SQL mood) ``` #' 1. Using florabank and its table tblTaxon: #' a. get the first 10 rows # SQL my_sql <- "SELECT TOP 10 * FROM tblTaxon" taxon_first_10 <- tbl(src = florabank, sql(my_sql)) #' b. get the scientific name (`NaamWetenschappelijk`) of Dutch name #' (`NaamNederlands`) Slank snavelmos # SQL slank_snavelmos_query <- "SELECT NaamWetenschappelijk FROM tblTaxon WHERE NaamNederlands = 'Slank snavelmos'" tbl(src = florabank, sql(slank_snavelmos_query)) #' c. get the scientific names (`NaamWetenschappelijk`) and Dutch names #' (`NaamNederlands`) of taxa with Dutch name starting with `Slank`. [Tip for SQL query](https://www.w3schools.com/SQL/sql_like.asp); [tip for tidyverse](https://github.com/tidyverse/dbplyr/issues/295) # SQL slank_query <- "SELECT NaamWetenschappelijk, NaamNederlands FROM tblTaxon WHERE NaamNederlands LIKE 'Slank%'" tbl(src = florabank, sql(slank_query)) #''3. How to get the column names of table `ivRecording` from `INBOVEG`? # SQL tbl(src = inboveg, sql("SELECT TOP 0 * FROM ivRecording")) # of dit: my_sql <- "SELECT * FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = N'ivRecording'" tbl(src = inboveg, sql(my_sql)) ``` JoL (in SQL mode) ``` sqlQuery <- "SELECT TOP 10 * FROM dbo.tblTaxon " tbl(src = florabank, sql(sqlQuery)) #'opzoeken van namen van kollommen in tabellen' #''3. How to get the column names of table `ivRecording` from `INBOVEG`? # SQL sqlivQuery <- "SELECT t.Name as TableName, c.name as ColumnName FROM sys.tables t inner join sys.columns c ON c.object_id = t.object_id WHERE t.name = 'ivrecording'" tbl(src = inboveg, sql(sqlivQuery)) ``` Amber: ``` #' 1. Using florabank and its table tblTaxon: #' a. get the first 10 rows # SQL tbl(src = florabank, sql("SELECT TOP 10 * FROM tblTaxon")) #' b. get the scientific name (`NaamWetenschappelijk`) of Dutch name #' (`NaamNederlands`) Slank snavelmos # SQL tbl(src = florabank, sql("SELECT Naamwetenschappelijk FROM tblTaxon WHERE NaamNederlands = 'Slank snavelmos'")) #' c. get the scientific names (`NaamWetenschappelijk`) and Dutch names #' (`NaamNederlands`) of taxa with Dutch name starting with `Slank`. [Tip for SQL query](https://www.w3schools.com/SQL/sql_like.asp); [tip for tidyverse](https://github.com/tidyverse/dbplyr/issues/295) # SQL tbl(src = florabank, sql("SELECT Naamwetenschappelijk FROM tblTaxon WHERE NaamNederlands LIKE 'Slank%'")) ``` Floris ``` #' 1. a. get the first 10 rows tbl(florabank, "tblTaxon") %>% head(10) #' b. get the scientific name (`NaamWetenschappelijk`) of Dutch name #' (`NaamNederlands`) Slank snavelmos tbl(florabank, "tblTaxon") %>% filter(NaamNederlands == "Slank snavelmos") %>% select(NaamWetenschappelijk) #' c. get the scientific names (`NaamWetenschappelijk`) and Dutch names #' (`NaamNederlands`) of taxa with Dutch name starting with `Slank`. [Tip for SQL query](https://www.w3schools.com/SQL/sql_like.asp); [tip for tidyverse](https://github.com/tidyverse/dbplyr/issues/295) tbl(florabank, "tblTaxon") %>% filter(NaamNederlands %like% "Slank%") %>% select(NaamWetenschappelijk, NaamNederlands) #' 2. tbl(inboveg, "ivRecording") %>% filter(Latitude >= 50.9, Latitude <= 51.1, Longitude >= 3.5, Longitude <= 3.9) %>% count(LocationCode) %>% arrange(desc(n)) %>% head(10) #''3. How to get the column names of table `ivRecording` from `INBOVEG`? tbl(inboveg, "ivRecording") %>% colnames ``` Charlotte: ``` #select first 10 rows tbl(florabank, "tblTaxon") %>% head(10) #scientific name slank snavelmos tbl(florabank, "tblTaxon") %>% filter(NaamNederlands == "Slank snavelmos") %>% select(NaamWetenschappelijk) #wet naam van selectie tbl(florabank, "tblTaxon") %>% filter(NaamNederlands %like% "Slank%") %>% select(NaamWetenschappelijk, NaamNederlands) #inboveg assignment tbl(src = inboveg, "ivRecording") %>% filter(Latitude >50.9 & Latitude <51.1) %>% filter(Longitude >3.5 & Longitude <3.9) %>% count(LocationCode) %>% arrange(desc(n)) %>% head(10) #how to get column names inboveg colnames(tbl(inboveg, "ivRecording")) ``` Raïsa ``` #' 2. Using INBOVEG database and its table ivRecording: a. retrieve the 10 #' locations (LocationCode) with the highest number of recordings with #' `Latitude` between 50.9 and 51.1 and `Longitude` between 3.9 and 3.5 ` tbl(src = inboveg, "ivRecording") %>% filter(Latitude >= 50.9 & Latitude<=51.1 & Longitude>=3.5 & Longitude<=3.9) %>% count(LocationCode) %>% arrange(desc(n)) %>% head(10) #''3. How to get the column names of table `ivRecording` from `INBOVEG`? tbl(inboveg, "ivRecording") %>% colnames ``` Amber: ``` #' 2. Using INBOVEG database and its table ivRecording: a. retrieve the 10 #' locations (LocationCode) with the highest number of recordings with #' `Latitude` between 50.9 and 51.1 and `Longitude` between 3.9 and 3.5 # SQL query <- " SELECT TOP 10 LocationCode, COUNT(Id) As TotalLocations FROM ivRecording WHERE (Latitude >= 50.9 AND Latitude <= 51.1) AND (Longitude >= 3.5 AND Longitude <= 3.9) GROUP BY LocationCode ORDER BY Count(Id) DESC " tbl(src = inboveg, sql(query)) ``` ## Bonus challenge