# INBO CODING CLUB 24 June 2021 Welcome! ## Share your code snippet If you want to share your code snippet, copy paste your snippet within a section of three backticks (```): As an **example**: ``` library(tidyverse) ``` (*you can copy paste this example and add your code further down*) ## Yellow sticky notes No yellow sticky notes online. Put your name + " | " and add a "*" each time you solve a challenge (see below). ## Participants Name | Challenges --- | --- Damiano Oldoni | * Raïsa Carmen | ** Lynn Pallemaerts |*** Emma Cartuyvels|** Hans Van Calster | *** Yasmine Verzelen |* Adriaan Seynaeve| Cécile Herr |** Matthieu Chastel |* Jasmijn Hillaert | Amber Mertens |* ## Challenge 1 Emma: ``` read_obs <- function(species, year) { ## set scientific name to lowercase species <- tolower(species) ## replace spaces with underscores species <- str_replace_all( species, pattern = " ", replacement = "_" ) ## compose filename file_name <- paste0("20210624_", species, "_", year, ".txt") ## read file read_tsv(paste0("./data/20210624/", file_name)) } ``` Cécile: ``` get_obs <- function(myspecies, datereceived, myyear){ # format datereceived: character "yyyymmdd" # (or maybe better to use prefix as a more general rule) ## set scientific name to lowercase species <- tolower(myspecies) ## replace spaces with underscores species <- str_replace_all( species, pattern = " ", replacement = "_" ) ## compose filename file_name <- paste0(datereceived, "_", species, "_", myyear ,".txt") ## read file # probably I don't need to replace the date here in the path since I chose to work there sp_year <- read_tsv(paste0("./data/20210624/", file_name)) } ``` Hans: ``` get_obs <- function(species, year, prefix) { ## set scientific name to lowercase species <- tolower(species) ## replace spaces with underscores species <- stringr::str_replace_all( species, pattern = " ", replacement = "_" ) ## compose filename file_name <- paste0(prefix, "_", species, "_", year, ".txt") ## read file observations <- readr::read_tsv(paste0("./data/20210624/", file_name)) return(observations) } ``` ## Challenge 2 Emma: ``` #' STEP 2: data cleaning, remove imprecise or unverified observations, absences #' Remove data with coordinate uncertainty higher than 1000 meters clean_data <- function(df, max_coord_uncertain = 1000, issues_to_discard = c( "ZERO_COORDINATE", "COORDINATE_OUT_OF_RANGE", "COORDINATE_INVALID", "COUNTRY_COORDINATE_MISMATCH" ), occurrenceStatus_to_discard = c( "absent", "excluded" )) { df <- df %>% filter(coordinateUncertaintyInMeters < max_coord_uncertain | is.na(coordinateUncertaintyInMeters)) #' Remove data with some issues df <- df %>% filter(!issue %in% issues_to_discard) #' Remove obs linked to absences or exclusions df %>% filter(!occurrenceStatus %in% occurrenceStatus_to_discard) } ha_2010 <- clean_data(ha_2010) #' STEP 3: get the 1x1km EAA grid cellcode each observation belongs to and add #' it to the observation data.frame #' Step 3a: reprojection assign_eea_cell <- function(df, longitude_colname, latitude_colname) { df <- df %>% mutate(x = !!sym(longitude_colname), y = !!sym(latitude_colname)) df <- st_as_sf(df, coords = c("x", "y"), crs = 4326) df <- st_transform(df, crs = 3035) coords <- st_coordinates(df) %>% as_tibble() %>% rename(x_3035 = X, y_3035= Y) df <- df %>% bind_cols(coords) ## Step 3b: get the EAA grid cell code each observation belongs to and add it to ## the data.frame df %>% mutate(eea_cell_code = paste0( "1km", "E", floor(x_3035/1000), "N", floor(y_3035/1000))) %>% select(-c(x_3035, y_3035)) } ha_2010 <- assign_eea_cell(ha_2010, "decimalLongitude", "decimalLatitude") ## Step 4: calculate number of observations of Harmonia axyridis in each grid cell get_n_obs_per_cell <- function(df) { df %>% as_tibble() %>% group_by(eea_cell_code) %>% count() } n_obs_ha_2010 <- get_n_obs_per_cell(ha_2010) #' STEP 5: read the EEA grid and get number of observations for each cell. Cells #' without observations are removed # Read EEA grid be_grid <- st_read("./data/20210624/20210624_eea_1x1km_grid_BE.gpkg") ## add number of observations via inner_join (cells with no obs automatically ## excluded) obs_to_grid <- function(grid, n_obs) { grid %>% inner_join(n_obs, by = c("CELLCODE" = "eea_cell_code") ) } be_grid_n_obs_ha_2010 <- obs_to_grid(be_grid, n_obs_ha_2010) ``` Hans: ``` clean_data <- function( df, max_coord_uncertain, issues_to_discard = c( "ZERO_COORDINATE", "COORDINATE_OUT_OF_RANGE", "COORDINATE_INVALID", "COUNTRY_COORDINATE_MISMATCH" ), occurrenceStatus_to_discard = c( "absent", "excluded" )) { #' STEP 2: data cleaning, remove imprecise or unverified observations, absences #' Remove data with coordinate uncertainty higher than 1000 meters df <- df %>% filter(coordinateUncertaintyInMeters < max_coord_uncertain | is.na(coordinateUncertaintyInMeters)) %>% #' Remove data with some issues filter(!issue %in% issues_to_discard) %>% #' Remove obs linked to absences or exclusions filter(!occurrenceStatus %in% occurrenceStatus_to_discard) return(df) } assign_eea_cell <- function( df, longitude_colname, latitude_colname) { #' STEP 3: get the 1x1km EAA grid cellcode each observation belongs to and add #' it to the observation data.frame #' Step 3a: reprojection df <- df %>% mutate(x = !!sym(longitude_colname), y = !!sym(latitude_colname)) df <- sf::st_as_sf(df, coords = c("x", "y"), crs = 4326) df <- sf::st_transform(df, crs = 3035) coords <- sf::st_coordinates(df) %>% as_tibble() %>% rename(x_3035 = X, y_3035 = Y) df <- df %>% bind_cols(coords) ## Step 3b: get the EAA grid cell code each observation belongs to and add it to ## the data.frame df <- df %>% mutate(eea_cell_code = paste0( "1km", "E", floor(x_3035/1000), "N", floor(y_3035/1000))) %>% select(-c(x_3035, y_3035)) return(df) } get_n_obs_per_cell <- function(df) { ## Step 4: calculate number of observations of Harmonia axyridis in each grid cell df %>% as_tibble() %>% group_by(eea_cell_code) %>% count() } be_grid_n_obs <- function(df_n_obs) { #' STEP 5: read the EEA grid and get number of observations for each cell. Cells #' without observations are removed # Read EEA grid be_grid <- sf::read_sf(here::here("./data/20210624/20210624_eea_1x1km_grid_BE.gpkg")) ## add number of observations via inner_join (cells with no obs automatically ## excluded) be_grid <- be_grid %>% inner_join(df_n_obs, by = c("CELLCODE" = "eea_cell_code") ) return(be_grid) } ``` Matthieu : ``` clean_data <- function(df,max_coord_uncertain,issues_to_discard,occurrenceStatus_to_discard){ #' STEP 2: data cleaning, remove imprecise or unverified observations, absences #' Remove data with coordinate uncertainty higher than 1000 meters df <- df %>% filter(coordinateUncertaintyInMeters < max_coord_uncertain | is.na(coordinateUncertaintyInMeters)) #' Remove data with some issues df <- df %>% filter(!issue %in% issues_to_discard) #' Remove obs linked to absences or exclusions df <- df %>% filter(!occurrenceStatus %in% occurrenceStatus_to_discard) return(df) } ``` ## INTERMEZZO ## Challenge 3 Hans: ``` visualize_obs_cells <- function(be_grid_data) { require(leaflet) require(htmltools) require(sf) ## STEP 6: make a leaflet heatmap pal <- colorBin(palette = "YlOrRd", domain = be_grid_data$n) labels <- sprintf( "EEA cell: %s<br/>Species: Harmonia axyridis<br/>Number of observations: %g", be_grid_data$CELLCODE, be_grid_data$n) %>% lapply(HTML) leaflet(st_transform(be_grid_data, crs = 4326)) %>% addTiles() %>% addPolygons( fillColor = ~pal(n), weight = 1, opacity = 1, color = "purple", fillOpacity = 0.7, highlightOptions = highlightOptions(weight = 2, color = "black", fillOpacity = 1, bringToFront = TRUE), label = labels, labelOptions = labelOptions( style = list("font-weight" = "normal", padding = "3px 8px"), textsize = "15px", direction = "auto")) %>% addLegend(title = "Harmonia axyridis (2010)<br>Number of observations", pal = pal, values = ~n) } make_report <- function(species, year, prefix) { df <- get_obs( species = species, year = year, prefix = prefix ) df <- clean_data(df = df, max_coord_uncertain = 1000) df <- assign_eea_cell( df = df, longitude_colname = "decimalLongitude" , latitude_colname = "decimalLatitude" ) df_n_obs <- get_n_obs_per_cell(df) be_grid_df_n_obs <- be_grid_n_obs(df_n_obs) leaflet_map <- visualize_obs_cells( be_grid_data = be_grid_df_n_obs) return( list( df = df, leaflet_map = leaflet_map ) ) } report <- make_report(species = "Harmonia axyridis", year = 2010, prefix = "20210624") report$df report$leaflet_map ``` ## Bonus challenge