# INBO CODING CLUB 25 May 2021 Welcome! ## Share your code snippet If you want to share your code snippet, copy paste your snippet within a section of three backticks (```): As an **example**: ``` library(tidyverse) ``` (*you can copy paste this example and add your code further down*) ## Yellow sticky notes No yellow sticky notes online. Put your name + " | " and add a "*" each time you solve a challenge (see below). ## Participants Name | Challenges --- | --- Damiano Oldoni | Anja Leyman |*** Lynn Pallemaerts |*** Dirk Maes | ** Hans Van Calster | *** Emma Cartuyvels |*** Patrik Oosterlynck |** An Leyssen |** Raïsa Carmen |*** Adriaan Seynaeve | Pieterjan Verhelst | * Jasmijn Hillaert | * Amber Mertens |*** Jeroen Vanden Borre | ** Lucia Manzanares | Matthieu Chastel | Joost Vanoverbeke | *** Els De Bie |*** Frank Huysentruyt | ** Suzanna Lettens | ## Challenge 1 Emma: ``` tabel1 <- left_join(coding_club_core_team, bmk_team, by = "name") tabel2 <- right_join(bmk_team, coding_club_core_team, by = "name") tabel3 <- inner_join(coding_club_core_team, bmk_team, by = "name") tabel4 <- inner_join(bmk_team, coding_club_core_team, by = "name") tabel5 <- full_join(coding_club_core_team, bmk_team, by = "name") ``` Raïsa: ``` coding_club_core_team %>% left_join(bmk_team, by = "name") bmk_team %>% right_join(coding_club_core_team, by = "name") coding_club_core_team %>% inner_join(bmk_team, by = "name") #Invert order of the columns bmk_team %>% inner_join(coding_club_core_team, by = "name") coding_club_core_team %>% full_join(bmk_team, by = "name") ``` Joost: ``` # kan ook zonder expliciet 'by = "name"' coding_club_core_team_orcid <- coding_club_core_team %>% left_join(bmk_team) coding_club_core_team_orcid <- bmk_team %>% right_join(coding_club_core_team) coding_club_core_team_orcid <- bmk_team %>% inner_join(coding_club_core_team) coding_club_core_team_orcid <- coding_club_core_team %>% inner_join(bmk_team) coding_club_core_team_orcid <- coding_club_core_team %>% full_join(bmk_team) ``` ## INTERMEZZO In 5 of 1st challenge, we do not only add columns (_variables_), but also rows (_cases_). Why cannot we do it then by functions which combine rows? try this? ``` union_all(coding_club_core_team, bmk_team) ``` or this? ``` union(coding_club_core_team, bmk_team) ``` or this? ``` bind_rows(coding_club_core_team, bmk_team) ``` ## Challenge 2 Anja ``` # 1) Distributions from Belgium, Netherlands and Luxembourg should not overlap. How can you check it? There are several ways to do it, but try to use a data joining technique, which is likely the shortest and more readable way str(distribution_BE) # country_code distr_all <- distribution_BE %>% full_join(distribution_LU) %>% full_join(distribution_NL) nrow(distr_all) - nrow(distribution_BE) - nrow(distribution_LU) - nrow(distribution_NL) # 2) Merge the three distribution_* dataframes into a data.frame called distribution distribution <- rbind(distribution_BE, distribution_LU, distribution_NL) nrow(distribution) # 3) Check that all taxa in vernacularname point to taxa in taxon vernacularname %>% left_join(taxon, by = c("taxon_id" = "taxonID")) %>% filter(is.na(scientificName)) %>% nrow() # 4) Which taxa in taxon do not have vernacular names? check <- taxon %>% left_join(vernacularname, by = c("taxonID" = "taxon_id")) %>% filter(is.na(vernacularName)) check$scientificName ``` # Dirk ``` distribution <- rbind(distribution_BE, distribution_LU, distribution_NL) n1 <- nrow(distribution) distribution <- distribution %>% distinct() n2 <- nrow(distribution) n1 - n2 ``` Emma: ``` intersect(distribution_BE, distribution_NL, distribution_LU) #this ignores one of the tables? intersect(distribution_BE, distribution_NL) intersect(distribution_BE, distribution_LU) intersect(distribution_LU, distribution_NL) distribution <- union(distribution_BE, distribution_NL) %>% union(distribution_LU) vernacularname %>% anti_join(taxon, by = c("taxon_id" = "taxonID")) taxon %>% anti_join(vernacularname, by = c("taxonID" = "taxon_id")) ``` ## Challenge 3 anja ``` # 1) extended_taxon <- taxon %>% full_join(vernacularname, by = c("taxonID" = "taxon_id"), suffix = c("_taxon", "_vernacular")) names(extended_taxon) # 2) Add distribution to extended_taxon and retain only the taxa found in Belgium, Netherlands or Luxembourg, i.e. only taxa in distribution distr_taxon <- extended_taxon %>% right_join(distribution, by = c("taxonID" = "taxon_id")) # 3) species_albania <- distribution_AL %>% distinct(taxon_id) species_europe <- distribution %>% distinct(taxon_id) albania <- setdiff(species_albania, species_europe, by = c("taxon_id")) europe <- setdiff(species_europe, species_albania, by = c("taxon_id")) head(distribution_AL) head(distribution) # 4) Which species are found both in Albania (taxon_AL) and in West-Europe? alb_eur <- intersect(species_albania, species_europe, by = c("taxon_id")) # 5) Help him to merge taxon_AL, distribution_AL and vernacularname_AL to taxon, distribution and vernacularname respecitvely taxon2 <- union(taxon, taxon_AL) distribution2 <- union(distribution, distribution_AL) vernacularname2 <- union(vernacularname, vernacularname_AL) ``` Emma: ``` #1,2 extended_taxon <- taxon %>% left_join(vernacularname, by = c("taxonID" = "taxon_id"), suffix = c("_taxon", "_vernacular")) %>% right_join(distribution, by = c("taxonID" = "taxon_id"), suffix = c("_taxon", "_distribution")) #3 distribution_AL %>% anti_join(extended_taxon, by = c("taxon_id" = "taxonID")) extended_taxon %>% anti_join(distribution_AL, by = c("taxonID" = "taxon_id")) #4 extended_taxon %>% inner_join(distribution_AL, by = c("taxonID" = "taxon_id")) #5 union(taxon, taxon_AL) union(distribution, distribution_AL) union(vernacularname, vernacularname_AL) ``` ## Nice to know How to get ORCID ID of somebody via R directly? Dirk found this: ``` library(rorcid) hvc <- orcid(query = "hans van calster") hvc ```