# INBO CODING CLUB 30 June 2022 Welcome! ## Share your code snippet If you want to share your code snippet, copy paste your snippet within a section of three backticks (```): As an **example**: ``` library(tidyverse) ``` (*you can copy paste this example and add your code further down*) ## Yellow sticky notes No yellow sticky notes online. Put your name + " | " and add a "*" each time you solve a challenge (see below). ## Participants Name | Challenges --- | --- Damiano Oldoni | Dirk Maes | An Leyssen |* Hans Van Calster | Emma Cartuyvels|** Lynn Pallemaerts | *** Teun Everts | Sarah Broos| Lucia Manzanares| Matthieu | ## Challenge 1 Emma: ``` get_obs_2010 <- function(species){ ## set scientific name to lowercase species <- tolower(species) ## replace spaces with underscores species <- str_replace_all( species, pattern = " ", replacement = "_" ) ## compose filename file_name <- paste0("20220630_", species, "_2010", ".txt") ## read file read_tsv(paste0("./data/20220630/", file_name)) } ha_2010 <- get_obs_2010("Harmonia axyridis") get_obs <- function(species, year){ ## set scientific name to lowercase species <- tolower(species) ## replace spaces with underscores species <- str_replace_all( species, pattern = " ", replacement = "_" ) ## compose filename file_name <- paste0("20220630_", species, "_", year, ".txt") ## read file read_tsv(paste0("./data/20220630/", file_name)) } ha_2011 <- get_obs("Harmonia axyridis", 2011) ``` Lynn: ``` get_obs <- function(species, year) { # standardize species name sp <- species sp <- tolower(sp) sp <- str_replace_all(sp, pattern = " ", replacement = "_") # compose filename file_name <- paste0("20220630_", sp, "_", year, ".txt") # read file df <- read_tsv(paste0("./data/20220630/", file_name)) return(df) } ``` Sarah: ``` get_obs_2010<-function(species){ ## set scientific name to lowercase species <- tolower(species) ## replace spaces with underscores species <- str_replace_all( species, pattern = " ", replacement = "_") ## compose filename file_name <- paste0("20220630_", species, "_2010", ".txt") ## read file ha_2010 <- read_tsv(paste0("./data/20220630/", file_name)) return (ha_2010) } get_obs_2010("Harmonia axyridis") get_obs_2010("Harmonia axyridis") get_obs<-function(species, year){ ## set scientific name to lowercase species <- tolower(species) ## replace spaces with underscores species <- str_replace_all( species, pattern = " ", replacement = "_") ## compose filename file_name <- paste0("20220630_", species, "_", year, ".txt") ## read file ha_year <- read_tsv(paste0("./data/20220630/", file_name)) return (ha_year) } get_obs("Harmonia axyridis",2012) ``` ## Challenge 2 Lynn: ``` clean_data <- function(df, max_coord_uncertain = 1000, issues_to_discard = c("ZERO_COORDINATE", "COORDINATE_OUT_OF_RANGE", "COORDINATE_INVALID", "COUNTRY_COORDINATE_MISMATCH"), occurrenceStatus_to_discard = c("absent", "excluded")) { df <- df %>% filter(coordinateUncertaintyInMeters < max_coord_uncertain | is.na(coordinateUncertaintyInMeters)) %>% filter(!issue %in% issues_to_discard) %>% filter(!occurrenceStatus %in% occurrenceStatus_to_discard) return(df) } assign_eea_cell <- function(df, longitude_colname, latitude_colname) { # reprojection df <- df %>% mutate(x = !!sym(longitude_colname), y = !!sym(latitude_colname)) df <- st_as_sf(df, coords = c("x", "y"), crs = 4326) df <- st_transform(df, crs = 3035) coords <- st_coordinates(df) %>% as_tibble() %>% rename(x_3035 = X, y_3035 = Y) df <- df %>% bind_cols(coords) # get the EAA grid cell code df <- df %>% mutate(eea_cell_code = paste0("1km", "E", floor(x_3035/1000), floor(y_3035/1000))) %>% select(-c(x_3035, y_3035)) return(df) } get_n_obs_per_cell <- function(df) { # calculate number of observations in each grid cell res <- df %>% as_tibble() %>% group_by(eea_cell_code) %>% count() return(res) } join_to_grid <- function(df) { # read EEA grid be_grid <- st_read("./data/20220630/20220630_eea_1x1km_grid_BE.gpkg") res <- be_grid %>% inner_join(df, by = c("CELLCODE" = "eea_cell_code")) return(res) } ``` Hans (challenge 1 + 2): - Gebruik van require() om packages in functie te laden - assertthat om input variabelen te checken ``` #' Get the observation for a species in a given year #' #' @param species character string #' @param year numeric year #' #' @return a dataframe #' @export #' #' @examples #' get_obs(species = "Harmonia axyridis", year = 2010) get_obs <- function( species, year ) { require(readr) require(assertthat) require(stringr) assert_that(is.character(species)) assert_that(is.numeric(year)) ## set scientific name to lowercase species <- tolower(species) ## replace spaces with underscores species <- str_replace_all( species, pattern = " ", replacement = "_" ) ## compose filename file_name <- paste0("20220630_", species, "_", year, ".txt") ## read file species_data <- read_tsv(paste0("./data/20220630/", file_name)) return(species_data) } #' Clean the data #' #' @param df a data.frame containing columns ... #' @param coord_max_uncertainty max coordinate uncertainty in meters #' @param remove_issues a character vector with valid issue names to be removed #' @param remove_status a character vector with valid status names to be removed #' #' @return #' @export #' #' @examples clean_data <- function( df, coord_max_uncertainty = 1000, remove_issues = c( "ZERO_COORDINATE", "COORDINATE_OUT_OF_RANGE", "COORDINATE_INVALID", "COUNTRY_COORDINATE_MISMATCH" ), remove_status = c( "absent", "excluded" )) { require(dplyr) require(assertthat) assert_that(is.data.frame(df)) assert_that(is.character(remove_issues)) assert_that(is.character(remove_status)) # Remove data with coordinate uncertainty higher than 1000 meters df_cleaned <- df %>% filter(coordinateUncertaintyInMeters < coord_max_uncertainty | is.na(coordinateUncertaintyInMeters)) %>% ## Remove data with some issues filter(!issue %in% remove_issues) %>% ## Remove obs linked to absences or exclusions filter(!occurrenceStatus %in% remove_status) return(df_cleaned) } #' Add spatial information EEA #' #' @param df #' @param longitude_colname character, name of the column with longitude values #' @param latitude_colname character, name of the column with latitude values #' #' @return #' @export #' #' @examples assign_eea_cell <- function( df, longitude_colname, latitude_colname ) { require(dplyr) require(sf) require(assertthat) assert_that(is.data.frame(df)) assert_that(is.string(longitude_colname)) assert_that(is.string(latitude_colname)) ## Step 3a: reprojection df_coords <- df %>% mutate(x = !!sym(longitude_colname), y = !!sym(latitude_colname)) %>% st_as_sf(coords = c("x", "y"), crs = 4326) %>% st_transform(crs = 3035) %>% st_coordinates() %>% as_tibble() %>% rename(x_3035 = X, y_3035 = Y) %>% bind_cols(df, .) %>% ## Step 3b: get the EAA grid cell code each observation belongs to and add it to ## the data.frame mutate(eea_cell_code = paste0( "1km", "E", floor(x_3035/1000), "N", floor(y_3035/1000))) %>% select(-c(x_3035, y_3035)) return(df_coords) } #' Calculate number of observations per grid cell #' #' @param df a data.frame #' #' @return #' @export #' #' @examples get_n_obs_per_cell <- function(df) { require(dplyr) require(assertthat) assert_that(is.data.frame(df)) df_count <- df %>% as_tibble() %>% group_by(eea_cell_code) %>% count() return(df_count) } #' Add number of observation to a sf object of the EEA grid #' #' @param df a data.frame #' #' @return #' @export #' #' @examples add_n_obs_to_eea_grid <- function(df) { require(sf) require(dplyr) require(tidyr) ## Read EEA grid be_grid <- read_sf("./data/20220630/20220630_eea_1x1km_grid_BE.gpkg") ## add number of observations via inner_join (cells with no obs automatically ## excluded) be_grid_n_obs <- be_grid %>% inner_join(df, by = c("CELLCODE" = "eea_cell_code") ) return(be_grid_n_obs) } ``` ## Challenge 3 Lynn: ``` visualize_obs_cells <- function(df, species = "Harmonia axyridis", year = 2010, palette = colorBin(palette = "YlOrRd", domain = df$n), fill_op = 0.7) { labels <- paste0("EEA cell: ", df$CELLCODE, "<br/>Species: ", species, "<br/>Number of observations: ", df$n) %>% lapply(HTML) leaflet(st_transform(df, crs = 4326)) %>% addTiles() %>% addPolygons(fillColor = ~palette(n), weight = 1, opacity = 1, color = "purple", fillOpacity = fill_op, highlightOptions = highlightOptions(weight = 2, color = "black", fillOpacity = 1, bringToFront = TRUE), label = labels, labelOptions = labelOptions(style = list("font-weight" = "normal", padding = "3px 8px"), textsize = "15px", direction = "auto")) %>% addLegend(title = paste0(species, " (", year, ")<br>Number of observations"), pal = palette, values = ~n) } ``` # Challenge 1, 2 & 3 Hans Functions: ``` #' Get the observation for a species in a given year #' #' @param species character string #' @param year numeric year #' #' @return a dataframe #' @export #' #' @examples #' get_obs(species = "Harmonia axyridis", year = 2010) get_obs <- function( species, year ) { require(readr) require(assertthat) require(stringr) assert_that(is.character(species)) assert_that(is.numeric(year)) ## set scientific name to lowercase species <- tolower(species) ## replace spaces with underscores species <- str_replace_all( species, pattern = " ", replacement = "_" ) ## compose filename file_name <- paste0("20220630_", species, "_", year, ".txt") ## read file species_data <- read_tsv(paste0("./data/20220630/", file_name)) return(species_data) } #' Clean the data #' #' @param df a data.frame containing columns ... #' @param coord_max_uncertainty max coordinate uncertainty in meters #' @param remove_issues a character vector with valid issue names to be removed #' @param remove_status a character vector with valid status names to be removed #' #' @return #' @export #' #' @examples clean_data <- function( df, coord_max_uncertainty = 1000, remove_issues = c( "ZERO_COORDINATE", "COORDINATE_OUT_OF_RANGE", "COORDINATE_INVALID", "COUNTRY_COORDINATE_MISMATCH" ), remove_status = c( "absent", "excluded" )) { require(dplyr) require(assertthat) assert_that(is.data.frame(df)) assert_that(is.character(remove_issues)) assert_that(is.character(remove_status)) # Remove data with coordinate uncertainty higher than 1000 meters df_cleaned <- df %>% filter(coordinateUncertaintyInMeters < coord_max_uncertainty | is.na(coordinateUncertaintyInMeters)) %>% ## Remove data with some issues filter(!issue %in% remove_issues) %>% ## Remove obs linked to absences or exclusions filter(!occurrenceStatus %in% remove_status) return(df_cleaned) } #' Add spatial information EEA #' #' @param df #' @param longitude_colname character, name of the column with longitude values #' @param latitude_colname character, name of the column with latitude values #' #' @return #' @export #' #' @examples assign_eea_cell <- function( df, longitude_colname, latitude_colname ) { require(dplyr) require(sf) require(assertthat) assert_that(is.data.frame(df)) assert_that(is.string(longitude_colname)) assert_that(is.string(latitude_colname)) ## Step 3a: reprojection df_coords <- df %>% mutate(x = !!sym(longitude_colname), y = !!sym(latitude_colname)) %>% st_as_sf(coords = c("x", "y"), crs = 4326) %>% st_transform(crs = 3035) %>% st_coordinates() %>% as_tibble() %>% rename(x_3035 = X, y_3035 = Y) %>% bind_cols(df, .) %>% ## Step 3b: get the EAA grid cell code each observation belongs to and add it to ## the data.frame mutate(eea_cell_code = paste0( "1km", "E", floor(x_3035/1000), "N", floor(y_3035/1000))) %>% select(-c(x_3035, y_3035)) return(df_coords) } #' Calculate number of observations per grid cell #' #' @param df a data.frame #' #' @return #' @export #' #' @examples get_n_obs_per_cell <- function(df) { require(dplyr) require(assertthat) assert_that(is.data.frame(df)) df_count <- df %>% as_tibble() %>% group_by(eea_cell_code, species, year) %>% count() return(df_count) } #' Add number of observation to a sf object of the EEA grid #' #' @param df a data.frame #' @param be_grid Belgium part of the EEA grid #' #' @return #' @export #' #' @examples add_n_obs_to_eea_grid <- function(df, be_grid) { require(sf) require(dplyr) require(tidyr) ## add number of observations via inner_join (cells with no obs automatically ## excluded) be_grid_n_obs <- be_grid %>% inner_join(df, by = c("CELLCODE" = "eea_cell_code") ) return(be_grid_n_obs) } #' Plot a leaflet map of number of observations of a species on the EEA grid #' #' @param sf_df #' #' @return #' @export #' #' @examples visualize_obs_cells <- function(sf_df) { require(leaflet) require(sf) require(dplyr) require(htmltools) pal <- colorBin(palette = "YlOrRd", domain = sf_df$n) labels <- sprintf( "EEA cell: %s<br/>Species: %s<br/>Number of observations: %g", sf_df$CELLCODE, sf_df$species, sf_df$n) %>% lapply(HTML) plot <- leaflet(st_transform(sf_df, crs = 4326)) %>% addTiles() %>% addPolygons( fillColor = ~pal(n), weight = 1, opacity = 1, color = "purple", fillOpacity = 0.7, highlightOptions = highlightOptions(weight = 2, color = "black", fillOpacity = 1, bringToFront = TRUE), label = labels, labelOptions = labelOptions( style = list("font-weight" = "normal", padding = "3px 8px"), textsize = "15px", direction = "auto")) %>% addLegend(title = paste0(sf_df$species[1], " (", sf_df$year[1], ")<br>Number of observations"), pal = pal, values = ~n) return(plot) } ``` Hans workflow: ``` source("./src/20220630/20220630_functions.R") ha_2010 <- get_obs(species = "Harmonia axyridis", year = 2010) ha_2011 <- get_obs(species = "Harmonia axyridis", year = 2011) cb_2010 <- get_obs(species = "Chorthippus biguttulus", year = 2010) eea_be_grid <- read_sf("./data/20220630/20220630_eea_1x1km_grid_BE.gpkg") ha_2010 %>% clean_data() %>% assign_eea_cell(longitude_colname = "decimalLongitude", latitude_colname = "decimalLatitude")%>% get_n_obs_per_cell() %>% add_n_obs_to_eea_grid(be_grid = eea_be_grid) %>% visualize_obs_cells() ```