# INBO CODING CLUB 26 September 2023 Welcome! ## Share your code snippet If you want to share your code snippet, copy paste your snippet within a section of three backticks (```): As an **example**: ``` library(tidyverse) ``` (*you can copy paste this example and add your code further down*) ## Yellow sticky notes No yellow sticky notes online. Put your name + " | " and add a "*" each time you solve a challenge (see below). ## Participants Name | Challenges --- | --- Damiano Oldoni | Pieter Huybrechts | ** Adriaan Seynaeve | Anja Leyman | ** Nele Mullens | * Margot Vanhellemont | ** ## Challenge 1 ### Pieter's solution ```r # 1.1 --------------------------------------------------------------------- get_obs_2010 <- function(species) { assertthat::assert_that(assertthat::is.string(species)) ## read observations ------------------------------------------------------- # STEP 1: read observations species_formatted <- stringr::str_replace_all( tolower(species), pattern = " ", replacement = "_" ) ## compose filename file_name <- paste0("20230926_", species_formatted, "_2010", ".txt") ## read file ha_2010_raw <- readr::read_tsv(file.path("data", "20230926", file_name), show_col_types = FALSE ) return(ha_2010_raw) } # 1.2 --------------------------------------------------------------------- get_obs <- function(species, year) { assertthat::assert_that(assertthat::is.string(species)) assertthat::assert_that(assertthat::is.count(year)) ## read observations ------------------------------------------------------- # STEP 1: read observations species_formatted <- stringr::str_replace_all( tolower(species), pattern = " ", replacement = "_" ) ## compose filename file_name <- paste0("20230926_", species_formatted, "_", year, ".txt") ## read file ha_raw <- readr::read_tsv(file.path("data", "20230926", file_name), show_col_types = FALSE ) return(ha_raw) } ``` ### Nele's solution ``` species_input <- "Harmonia axyridis" get_obs_2010 <- function(species){ species <- tolower(species) species <- str_replace_all( species, pattern = " ", replacement = "_" ) file_name <- paste0("20230926_", species, "_2010", ".txt") ha_2010 <- read_tsv(paste0("./data/", file_name), show_col_types = FALSE) return(as.data.frame(ha_2010)) } output_2010 <- get_obs_2010(species_input) output_2010 get_obs <- function(species, year){ species <- tolower(species) species <- str_replace_all( species, pattern = " ", replacement = "_" ) file_name <- paste0("20230926_", species, "_", year, ".txt") datafile <- read_tsv(paste0("./data/", file_name), show_col_types = FALSE) return(as.data.frame(datafile)) } input_sp <- "Harmonia axyridis" input_yr <- "2010" obs_2010_Ha <- get_obs(input_sp, input_yr) obs_2010_Ha ``` ## Challenge 2 ### Pieter's solution ```r # 2A ---------------------------------------------------------------------- clean_data <- function(df, max_coord_uncertain = 1000, issues_to_discard = c( "ZERO_COORDINATE", "COORDINATE_OUT_OF_RANGE", "COORDINATE_INVALID", "COUNTRY_COORDINATE_MISMATCH" ), occurrenceStatus_to_discard = c( "absent", "excluded" )) { #' Clean Data from inventory #' #' @param df data.frame. In the shape i expect and should describe here. #' @param max_coord_uncertain Numeric. Max coordinate uncertainty in meters to filter on. #' @param issues_to_discard Character vector. What GBIF issues should be discarded? List of allowed values via `rgbif::gbif_issues()$issue`. #' @param occurrenceStatus_to_discard Character vector. What occurrenceStatuses should be discarded? #' #' @return A cleaned dataframe. #' #' @examples #' get_obs("Harmonia axyridis", year = 2010) |> clean_data() ## make sure our input variables are as we expect -------------------------- # only allow our input variables to be of certain classes assertthat::assert_that(is.data.frame(df)) assertthat::assert_that(assertthat::is.count(max_coord_uncertain)) assertthat::assert_that(is.character(issues_to_discard)) assertthat::assert_that(is.character(occurrenceStatus_to_discard)) # only allow certain values, which we get from a different package assertthat::assert_that(all(issues_to_discard %in% rgbif::gbif_issues()$issue), msg = "`issues_to_discard` should be one of the issues from `rgbif::gbif_issues()$issue`" ) ## clean the input dataframe ----------------------------------------------- cleaned_df <- df |> dplyr::filter( coordinateUncertaintyInMeters < max_coord_uncertain | is.na(coordinateUncertaintyInMeters) ) |> dplyr::filter(!issue %in% issues_to_discard) |> dplyr::filter(!occurrenceStatus %in% occurrenceStatus_to_discard) return(cleaned_df) } calc_grid_cell <- function(df, lon_size_degrees = 0.1, lat_size_degrees = 0.05) { # make sure the input dataframe is a dataframe, and has the right columns assertthat::assert_that(is.data.frame(df)) assertthat::assert_that("decimalLongitude" %in% colnames(df)) assertthat::assert_that("decimalLatitude" %in% colnames(df)) # add a column with the cell_size df_with_cell_size <- df |> dplyr::mutate(cell_code = paste0( "01x005", "E", floor(decimalLongitude / lon_size_degrees), "N", floor(decimalLatitude / lat_size_degrees) )) return(df_with_cell_size) } # 2B ---------------------------------------------------------------------- calc_n_obs_ind <- function(df) { df |> dplyr::group_by(cell_code) |> # number of observations (rows) dplyr::summarise(n_observations = dplyr::n(), # number of individuals n_individuals = sum(individualCount)) } plot_distr_cells <- function(df, binwidth = 5) { p <- ggplot2::ggplot(df) + ggplot2::geom_histogram(ggplot2::aes(x = n, fill = indicator), position = "dodge", binwidth = binwidth) + ggplot2::xlab(glue::glue("n (binwidth: {binwidth})")) + ggplot2::ggtitle(label = "Grid cells distribution") return(p) } ``` ## INTERMEZZO ## Challenge 3 ## Bonus challenge There is no bonus challange!