INBO CODING CLUB

# INBO CODING CLUB 26 September 2024 Welcome! ## Share your code snippet If you want to share your code snippet, copy paste your snippet within a section of three backticks (```): As an **example**: ``` library(tidyverse) ``` (*you can copy paste this example and add your code further down*) ## Yellow sticky notes No yellow sticky notes online. Put your name + " | " and add a "*" each time you solve a challenge (see below). ## Participants Name | Challenges --- | --- Damiano Oldoni | *** Nele Mullens | ** Jorre Vannieuwenhuyze | Sanne Govaert | Pieter Huybrechts | *** Falk Mielke | Adriaan Seynaeve | ## Challenge 0 ### Cecilia: ```{r} make_bread <- function(grains, yeast, water, salt) { # Code to generate `bread` bread <- grains + yeast + water + salt return(bread) } make_focaccia <- function(grains, yeast, water, salt) { # Code to generate `focaccia` focaccia <- grains + 1.5 * yeast + 0.7 * water + 2 * salt return(focaccia) } make_doughs <- function(grains, yeast, water, salt) { # Function to generate `bread` bread <- make_bread(grains, yeast, water, salt) # Function to generate `bread` focaccia <- make_focaccia(grains, yeast, water, salt) doughs <- list(bread = bread, focaccia = focaccia) return(doughs) } ``` ### Thierry ``` make_doughs <- function(grains, yeast, water, salt) { doughs <- list( bread = make_bread( grains = grains, yeast = yeast, water = water, salt = salt ), focaccia = make_focaccia( grains = grains, yeast = yeast, water = water, salt = salt ) ) return(doughs) } make_bread <- function(grains, yeast, water, salt) { grains + yeast + water + salt } make_focaccia <- function(grains, yeast, water, salt) { grains + 1.5 * yeast + 0.7 * water + 2 * salt } ``` ### Falk ```{r} make_dough_making_function <- function(m1, m2, m3, m4) { function(grains, yeast, water, salt) { m1*grains + m2*yeast + m3*water + m4*salt } } make_bread <- make_dough_making_function(1, 1, 1, 1) make_focaccia <- make_dough_making_function(1, 1.5, 0.7, 2) make_doughs <- function(grains, yeast, water, salt) { # Combine bread and focaccia as a list of doughs doughs <- list(bread = make_bread(grains, yeast, water, salt), focaccia = make_focaccia(grains, yeast, water, salt)) return(doughs) } ``` ### Kaat ```r make_bread <- function(grains, yeast, water, salt) { # Code to generate `bread` bread <- grains + yeast + water + salt return(bread)} make_foccacia <- function(grains, yeast, water, salt) { # Code to generate `bread` focaccia <- grains + 1.5 * yeast + 0.7 * water + 2 * salt return(focaccia)} make_doughs <- function(grains, yeast, water, salt) { # Code to generate `bread` and `focaccia` bread <- make_bread(grains, yeast, water, salt) focaccia <- make_foccacia(grains, yeast, water, salt) # Combine bread and focaccia as a list of doughs doughs <- list(bread = bread, focaccia = focaccia) return(doughs) } ``` ## Robrecht ```r make_doughs <- function(grains, yeast, water, salt, output = "both"){ if(output %in% c("both", "bread", "foccacia")){ bread <- grains + yeast + water + salt foccacia <- grains + 1.5 * yeast + 0.7 * water + 2 * salt doughs <- list(bread = bread, foccacia = foccacia) ifelse(output == "bread", return(doughs$bread), ifelse(output == "foccacio",return(doughs$foccacia), return(doughs))) } else{ return("please provide output in the form as 'both', 'bread' or 'foccacia'") } } ``` ## Challenge 1 ### Damiano's solution (example) Copy paste this section to show your solutions. ```r # dummy code print("This is how to insert code.") ``` ### Jorre ```r get_obs_2010 <- function(species){ # Set scientific name to lowercase species <- tolower(species) # Replace spaces with underscores species <- str_replace_all( species, pattern = " ", replacement = "_" ) # Compose filename file_name <- paste0("20240926_", species, "_2010", ".txt") # Read file ha_2010 <- read_tsv(paste0("./data/20240926/",file_name)) return(ha_2010) } get_obs <- function(speciesm,year){ # Set scientific name to lowercase species <- tolower(species) # Replace spaces with underscores species <- str_replace_all( species, pattern = " ", replacement = "_" ) # Compose filename file_name <- paste0("20240926_", species, "_",year, ".txt") # Read file ha <- read_tsv(paste0("./data/20240926/",file_name)) return(ha) } ``` ### Kaat ``` get_obs_2010 <- function (species){ # Set scientific name to lowercase species <- tolower(species) # Replace spaces with underscores species <- str_replace_all( species, pattern = " ", replacement = "_" ) # Compose filename file_name <- paste0("20240926_", species, "_2010", ".txt") # Read file ha_2010 <- read_tsv(paste0("./data/20240926/", file_name)) return(ha_2010) } get_obs <- function (species, year){ # Set scientific name to lowercase species <- tolower(species) # Replace spaces with underscores species <- str_replace_all( species, pattern = " ", replacement = "_" ) # Compose filename file_name <- paste0("20240926_", species,"_", year, ".txt") # Read file ha <- read_tsv(paste0("./data/20240926/", file_name)) return(ha) } ``` ### Nele ```r get_obs_2010 <- function(species) { species <- tolower(species) species <- str_replace_all( species, pattern = " ", replacement = "_" ) file_name <- paste0("20240926_", species, "_2010", ".txt") ha_2010 <- read_tsv(paste0("./data/20240926/", file_name)) return(as.data.frame(ha_2010)) } df_H_axyridis <- get_obs_2010("Harmonia axyridis") get_obs <- function(species, year) { species <- tolower(species) species <- str_replace_all( species, pattern = " ", replacement = "_" ) file_name <- paste0("20240926_", species, "_", year, ".txt") dataset_obs <- read_tsv(paste0("./data/20240926/", file_name)) return(as.data.frame(dataset_obs)) } df_H_axyridis <- get_obs("Harmonia axyridis", "2011") ``` ### Thierry ``` read_data <- function(species = "Harmonia axyridis", year = 2010, root = ".") { stopifnot(require(assertthat), require(readr)) assert_that( is.string(species), noNA(species), is.string(root), noNA(root), file_test("-d", root), is.count(year) ) tolower(species) |> gsub(pattern = " ", replacement = "_") |> sprintf(fmt = "20240926_%s_%i.txt", year) -> filename file.path(root, filename) |> read_tsv() } ``` ### Sanne's solution ```r # Challenge 1.1 get_obs_2010 <- function(species) { # Set scientific name to lowercase and replace spaces with underscores species <- tolower(species) %>% stringr::str_replace_all(pattern = " ", replacement = "_") # Compose filename file_name <- paste0("20240926_", species, "_2010", ".txt") # Read file observations_2010 <- readr::read_tsv(paste0("./data/20240926/", file_name)) return(observations_2010) } # Challenge 1.2 get_obs <- function(species, year) { # Set scientific name to lowercase and replace spaces with underscores species <- tolower(species) %>% stringr::str_replace_all(pattern = " ", replacement = "_") # Compose filename file_name <- paste0("20240926_", species, "_", year, ".txt") # Read file observations <- readr::read_tsv(paste0("./data/20240926/", file_name)) return(observations) } ``` ### Cecilia ```{r} # Define functions get_obs_2010 <- function(species) { # Set scientific name to lowercase species <- tolower(species) # Replace spaces with underscores species <- str_replace_all( species, pattern = " ", replacement = "_" ) # Compose filename file_name <- paste0("20240926_", species, "_2010", ".txt") # Read file ha_2010 <- read_tsv(paste0("./data/20240926/", file_name)) return(ha_2010) } get_obs <- function(species, year) { # Set scientific name to lowercase species <- tolower(species) # Replace spaces with underscores species <- str_replace_all( species, pattern = " ", replacement = "_" ) # Compose filename file_name <- paste0("20240926_", species, "_", year, ".txt") # Read file ha <- read_tsv(paste0("./data/20240926/", file_name)) return(ha) } # Define variable species <- "Harmonia axyridis" year <- "2011" # Call function related ha_2010 <- get_obs_2010(species) ha <- get_obs(species, year) ``` ### Robrecht ``` get_obs_2010 <- function(...){ files <- list.files(pattern = ".*.txt") all_data <- c() for(file in files){ data <- read.delim(file, header = TRUE) all_data <- merge(all_data, data, all = TRUE) } return(all_data) } data <- get_obs_2010() ``` ### Falk ```r get_obs <- function(year, species) { # Set scientific name to lowercase fn_species <- tolower(species) # Replace spaces with underscores fn_species <- stringr::str_replace_all(fn_species, pattern = " ", replacement = "_") # check year input if (!is.character(year)) { fn_year <- paste0(year) } else { fn_year <- year } # get current folder fn_date <- "20240926" # format(lubridate::today(), format = "%Y%m%d") file_name <- paste0("./data/", fn_date, "/", fn_date, "_", fn_species, "_", fn_year, ".txt" ) # Read file data <- readr::read_tsv(file_name) return(data) } get_obs(2024, "Harmonia axyridis") ``` ## Challenge 2 ### Thierry ``` clean_data <- function( data, max_coord_uncertain = 1000, issues_to_discard = c( "ZERO_COORDINATE", "COORDINATE_OUT_OF_RANGE", "COORDINATE_INVALID", "COUNTRY_COORDINATE_MISMATCH" ), occurrenceStatus_to_discard = c("absent", "excluded") ) { stopifnot(require(assertthat), require(tidyverse)) assert_that( inherits(data, "data.frame"), has_name(data, "issue"), has_name(data, "occurrenceStatus"), has_name(data, "coordinateUncertaintyInMeters"), is.number(max_coord_uncertain), noNA(max_coord_uncertain) ) issues_to_discard <- match.arg(issues_to_discard, several.ok = TRUE) occurrenceStatus_to_discard <- match.arg( occurrenceStatus_to_discard, several.ok = TRUE ) data |> filter( .data$coordinateUncertaintyInMeters < max_coord_uncertain | is.na(.data$coordinateUncertaintyInMeters), !.data$issue %in% issues_to_discard, !.data$occurrenceStatus %in% occurrenceStatus_to_discard ) } calc_grid_cell <- function(df, lon_step = 0.1, lat_step = 0.05) { stopifnot(require(assertthat), require(tidyverse)) assert_that( inherits(df, "data.frame"), has_name(df, "decimalLongitude"), has_name(df, "decimalLongitude"), is.number(lon_step), noNA(lon_step), is.number(lat_step), noNA(lat_step) ) df |> mutate( cell_code = sprintf( fmt = "01x005E%iN%i", floor(.data$decimalLongitude / lon_step), floor(.data$decimalLatitude / lat_step)) ) } calc_grid_cell_base <- function( df, lon = "decimalLongitude", lat = "decimalLatitude", lon_step = 0.1, lat_step = 0.05 ) { stopifnot(require(assertthat)) assert_that( inherits(df, "data.frame"), has_name(df, lat), has_name(df, lon), is.number(lon_step), noNA(lon_step), is.number(lat_step), noNA(lat_step) ) df$cell_code <- sprintf( fmt = "01x005E%iN%i", floor(df[, lon] / lon_step), floor(df[, lat] / lat_step) ) } calc_n_obs_ind <- function(df) { stopifnot(require(assertthat), require(tidyverse)) assert_that( inherits(df, "data.frame"), has_name(df, "cell_code"), has_name(df, "individualCount") ) df |> group_by(.data$cell_code) %>% summarise( n_observations = n(), n_individuals = sum(.data$individualCount) ) } plot_distr_cells <- function(df, binwidth = 5) { stopifnot(require(assertthat), require(tidyverse)) assert_that( inherits(df, "data.frame"), has_name(df, "n_observations"), has_name(df, "n_individuals"), is.number(binwidth), noNA(binwidth), binwidth > 0 ) df |> pivot_longer( cols = c("n_observations", "n_individuals"), values_to = "n", names_to = "indicator", names_pattern = "n_?(.*)" ) |> ggplot() + geom_histogram( aes(x = n, fill = indicator), position = "dodge", binwidth = binwidth ) + xlab("n (binwidth: 5)") + ggtitle(label = "Grid cells distribution") } ``` ### Cecilia ```{r} clean_data <- function(df, max_coord_uncertain = 1000, issues_to_discard = c( "ZERO_COORDINATE", "COORDINATE_OUT_OF_RANGE", "COORDINATE_INVALID", "COUNTRY_COORDINATE_MISMATCH"), occurrenceStatus_to_discard = c( "absent", "excluded") ) { ## STEP 2: data cleaning # Remove observations with coordinate uncertainty higher than 1000 meters df <- df %>% filter(coordinateUncertaintyInMeters < max_coord_uncertain | is.na(coordinateUncertaintyInMeters)) # Remove data with some geographic issues df <- df %>% filter(!issue %in% issues_to_discard) # Remove absences df <- df %>% filter(!occurrenceStatus %in% occurrenceStatus_to_discard) return(df) } calc_grid_cell <- function(df, lon = 0.1, lan = 0.05) { ## STEP 3: get the grid cell code each observation belongs to and add it to ## the data.frame. We use grid cells of 0.1 lon degrees x 0.05 lat degrees df <- df %>% mutate(cell_code = paste0( "01x005", "E", floor(decimalLongitude/lan), # cell size "N", floor(decimalLatitude/lon))) return(df) } ha_clean <- clean_data(ha) ha_clean_extra_col <- calc_grid_cell(ha_clean) ``` ### Nele ```{r} #2.1: clean_data(): clean_data <- function(obs_data) { obs_dat <- obs_data %>% filter(coordinateUncertaintyInMeters < 1000 | is.na(coordinateUncertaintyInMeters)) obs_dat %>% filter(!issue %in% c( "ZERO_COORDINATE", "COORDINATE_OUT_OF_RANGE", "COORDINATE_INVALID", "COUNTRY_COORDINATE_MISMATCH" )) obs_dat <- obs_dat %>% filter(!occurrenceStatus %in% c( "absent", "excluded" )) } clean_H_axyridis <- clean_data(df_H_axyridis) #2.2: calc_grid_cell(): calc_grid_cell <- function(clean_data, name_long = "decimalLongitude", name_lat = "decimalLatitude",lon = 0.1, lat = 0.05) { clean_data <- clean_data %>% mutate(cell_code = paste0( "01x005", "E", floor(.data[[name_long]]/lon), #not just use name_long, because R will expect a colname names name_long "N", floor(.data[[name_lat]]/lat))) } grid_H_axyridis <- calc_grid_cell(clean_H_axyridis) #2.3: calc_n_obs_ind(): calc_n_obs_ind <- function(dataframe_obs) { n_obs <- dataframe_obs %>% group_by(cell_code) %>% summarise(n_observations = n(), # number of observations (rows) n_individuals = sum(individualCount)) # number of individuals } obs_H_axyridis <- calc_n_obs_ind(grid_H_axyridis) #2.4: plot_distr_cells(): plot_distr_cells <- function (obs_count, binsize = 5) { n_obs <- obs_count %>% tidyr::pivot_longer(cols = c(n_observations, n_individuals), values_to = "n", names_to = "indicator", names_pattern = "n_?(.*)" ) p <- ggplot(n_obs) + geom_histogram(aes(x = n, fill = indicator), position = "dodge", binwidth = binsize) + xlab("n (binwidth: 5)") + ggtitle(label = "Grid cells distribution") p return(p) } plot_distr_cells(obs_H_axyridis, 10) ``` ### Jorre ``` clean_data <- function( df, max_coord_uncertain=1000, issues_to_discard=c( "ZERO_COORDINATE", "COORDINATE_OUT_OF_RANGE", "COORDINATE_INVALID", "COUNTRY_COORDINATE_MISMATCH" ), occurrenceStatus_to_discard=c( "absent", "excluded" ) ){ stopifnot(require(assertthat), require(tidyverse)) assert_that( is.data.frame(df), is.numeric(max_coord_uncertain), max_coord_uncertain>=0, is.character(issues_to_discard), is.character(occurrenceStatus_to_discard) ) df |> filter( coordinateUncertaintyInMeters < max_coord_uncertain | is.na(coordinateUncertaintyInMeters) ) |> filter(!issue %in% issues_to_discard) |> filter(!occurrenceStatus %in% occurrenceStatus_to_discard) } calc_grid_cell <- function(df, lon=.1,lat=.05, lonvar='decimalLongitude',latvar='decimalLatitude' ){ stopifnot(require(assertthat), require(tidyverse)) assert_that( is.data.frame(df), is.numeric(lon), is.numeric(lat), is.character(lonvar),is.character(latvar) ) df |> mutate( cell_code = paste0( "01x005", "E", floor(!!sym(lonvar)/lon), "N", floor(!!sym(latvar)/lat) ) ) } ``` ### Falk ```r clean_data <- function(df, max_coord_uncertain = 1000, issues_to_discard = c("ZERO_COORDINATE", "COORDINATE_OUT_OF_RANGE", "COORDINATE_INVALID", "COUNTRY_COORDINATE_MISMATCH" ), occurrenceStatus_to_discard = c("absent", "excluded") ) { # Remove observations with coordinate uncertainty higher than max_coord_uncertain df <- df %>% filter(coordinateUncertaintyInMeters < max_coord_uncertain , !is.na(coordinateUncertaintyInMeters) ) # Remove data with some geographic issues df <- df %>% filter(!issue %in% issues_to_discard) # Remove absences df <- df %>% filter(!occurrenceStatus %in% occurrenceStatus_to_discard) return(df) } ``` *(I got distracted afterwards...)* ### Emma's solution ```r clean_data <- function(df, max_coord_uncertain = 1000, issues_to_discard = c( "ZERO_COORDINATE", "COORDINATE_OUT_OF_RANGE", "COORDINATE_INVALID", "COUNTRY_COORDINATE_MISMATCH" ), occurrenceStatus_to_discard = c( "absent", "excluded" )){ # Remove observations with coordinate uncertainty higher than 1000 meters data_clean <- df %>% filter(coordinateUncertaintyInMeters < max_coord_uncertain | is.na(coordinateUncertaintyInMeters)) # Remove data with some geographic issues data_clean <- data_clean %>% filter(!issue %in% issues_to_discard) # Remove absences data_clean <- data_clean %>% filter(!occurrenceStatus %in% occurrenceStatus_to_discard) return(data_clean) } calc_grid_cell <- function(df, name_long = "decimalLongitude", name_lat = "decimalLatitude", lat = 0.05, lon = 0.1){ df_grid <- df %>% mutate(cell_code = paste0( "01x005", "E", floor(get(name_long)/lon), "N", floor(get(name_lat)/lat))) return(df_grid) } calc_n_obs_ind <- function(df){ n_obs_ind <- df %>% group_by(cell_code) %>% summarise(n_observations = n(), # number of observations (rows) n_individuals = sum(individualCount)) # number of individuals return(n_obs_ind) } plot_distr_cells <- function(df, binwidth = 5){ n_obs_ind_ha <- df %>% tidyr::pivot_longer(cols = c(n_observations, n_individuals), values_to = "n", names_to = "indicator", names_pattern = "n_?(.*)" ) p <- ggplot(n_obs_ind_ha) + geom_histogram(aes(x = n, fill = indicator), position = "dodge", binwidth = binwidth) + xlab(paste0("n (binwidth: ", binwidth, ")")) + ggtitle(label = "Grid cells distribution") return(p) } ha_2010 <- get_obs("Harmonia axyridis", 2010) cleaned_ha_2010 <- clean_data(ha_2010) grid_ha_2010 <- calc_grid_cell(cleaned_ha_2010) n_obs_ha_2010 <- calc_n_obs_ind(grid_ha_2010) plot_distr_cells(n_obs_ha_2010) ``` ### Pieter ```r #' Data Cleaning #' #' @param df data.frame with observations #' @param max_coord_uncertain maximum of coordinateUncertaintyInMeters allowed (numeric). #' @param issues_to_discard issues whose obs have to be removed (character). #' @param occurrenceStatus_to_discard the occurrenceStatus values whose obs have to be removed (character). #' #' @return data.frame with the cleaned observations #' #' @examples clean_data(df = get_obs(species = "Harmonia axyridis", year = 2011)) clean_data <- function(df, max_coord_uncertain = 1000, issues_to_discard = c( "ZERO_COORDINATE", "COORDINATE_OUT_OF_RANGE", "COORDINATE_INVALID", "COUNTRY_COORDINATE_MISMATCH" ), occurrenceStatus_to_discard = c( "absent", "excluded" )) { # Check if all the required columns are present in the input data.frame assertthat::assert_that(assertthat::has_name(df, "coordinateUncertaintyInMeters")) assertthat::assert_that(assertthat::has_name(df, "issue")) assertthat::assert_that(assertthat::has_name(df, "occurrenceStatus")) # Remove observations with coordinate uncertainty higher than the maximum # allowed value if (!missing(max_coord_uncertain)) { df <- dplyr::filter(df, coordinateUncertaintyInMeters < max_coord_uncertain) %>% dplyr::filter(!is.na(coordinateUncertaintyInMeters)) } # Remove data with some geographic issues if (!missing(issues_to_discard)) { df <- dplyr::filter(df, !issue %in% issues_to_discard) } # Remove absences if (!missing(occurrenceStatus_to_discard)) { df <- dplyr::filter(df, !occurrenceStatus %in% occurrenceStatus_to_discard) } # Return cleaned data.frame return(df) } #' Calculate grid cell code based on decimal longitude and latitude #' #' This function calculates the grid cell code for each row in a data frame based on the decimal longitude and latitude values. #' The grid cell code is generated by dividing the longitude and latitude values by the specified cell sizes and concatenating them with the grid cell code prefix. #' #' @param df The input data frame. #' @param decimalLongitude The name of the column containing the decimal longitude values. #' @param decimalLatitude The name of the column containing the decimal latitude values. #' @param cell_size_E The size of the grid cells in the east-west direction. #' @param cell_size_N The size of the grid cells in the north-south direction. #' #' @return The input data frame with an additional column "cell_code" containing the grid cell codes. calculate_grid_cell_code <- function(df, decimalLongitude, decimalLatitude, cell_size_E, cell_size_N) { # Function implementation goes here # ... # ... } # Usage example: # df <- data.frame(decimalLongitude = c(1.234, 2.345, 3.456), # decimalLatitude = c(4.567, 5.678, 6.789)) # df <- calculate_grid_cell_code(df, "decimalLongitude", "decimalLatitude", 0.1, 0.1) #' #' @examples #' df <- data.frame(decimalLongitude = c(10.5, 11.2, 12.7), #' decimalLatitude = c(50.2, 51.8, 52.3)) #' calc_grid_cell(df) #' # Output: #' # decimalLongitude decimalLatitude cell_code #' # 1 10.5 50.2 01x005E210N100 #' # 2 11.2 51.8 01x005E224N103 #' # 3 12.7 52.3 01x005E254N104 calc_grid_cell <- function(df, decimalLongitude = "decimalLongitude", decimalLatitude = "decimalLatitude", cell_size_E = 0.1, cell_size_N = 0.05){ dplyr::mutate(df, cell_code = paste0( "01x005", "E", floor(.data[[decimalLongitude]]/cell_size_E), "N", floor(.data[[decimalLatitude]]/cell_size_N))) } #' Calculate the number of observations and individuals per cell code #' #' This function takes a data frame as input and calculates the number of observations #' (rows) and the number of individuals per cell code. It groups the data frame by the #' cell code and then calculates the counts using the `n()` and `sum()` functions. #' #' @param df The input data frame #' @return A data frame with the number of observations and individuals per cell code #' #' @examples #' df <- data.frame(cell_code = c("A", "A", "B", "B"), #' individualCount = c(1, 2, 3, 4)) #' calc_n_obs_ind(df) #' calc_n_obs_ind <- function(df){ df %>% group_by(cell_code) %>% summarise(n_observations = n(), # number of observations (rows) n_individuals = sum(individualCount)) # number of individuals } #' Plot Distribution of Cells #' #' This function takes a data frame and a width parameter as input and plots the distribution of grid cells. #' #' @param df The data frame containing the data for the plot. #' @param width The width parameter for the histogram binwidth. #' #' @return Plot of the distribution of grid cells. #' #' @examples #' df <- data.frame(n_observations = c(10, 20, 30), #' n_individuals = c(5, 10, 15)) #' plot_distr_cells(df, 5) #' plot_distr_cells <- function(df, width){ # Prepare the data for the plot n_obs_ind <- df %>% tidyr::pivot_longer(cols = c(n_observations, n_individuals), values_to = "n", names_to = "indicator", names_pattern = "n_?(.*)" ) # Create the plot p <- ggplot(n_obs_ind) + geom_histogram(aes(x = n, fill = indicator), position = "dodge", binwidth = width) + xlab(sprintf("n (binwidth: %s)", width)) + ggtitle(label = "Grid cells distribution") # Return the plot return(p) } ``` ## Challenge 3 ### Emma's solution ```r analyse_obs <- function(species, year, ...){ ha_2010 <- get_obs(species, year) cleaned_ha_2010 <- clean_data(ha_2010, ...) grid_ha_2010 <- calc_grid_cell(cleaned_ha_2010) n_obs_ha_2010 <- calc_n_obs_ind(grid_ha_2010) p <- plot_distr_cells(n_obs_ha_2010) return(list(data = n_obs_ha_2010, plot = p)) } ``` ### Pieter ```r analyse_obs <- function(species, year, max_coord_uncertain = 1000, issues_to_discard = c( "ZERO_COORDINATE", "COORDINATE_OUT_OF_RANGE", "COORDINATE_INVALID", "COUNTRY_COORDINATE_MISMATCH" ), occurrenceStatus_to_discard = c( "absent", "excluded" ), decimalLongitude = "decimalLongitude", decimalLatitude = "decimalLatitude", cell_size_E = 0.1, cell_size_N = 0.05, width = 10) { n_obs_ind <- get_obs(species, year) %>% clean_data(max_coord_uncertain, issues_to_discard, occurrenceStatus_to_discard) %>% calc_grid_cell(decimalLongitude, decimalLatitude, cell_size_E, cell_size_N) %>% calc_n_obs_ind() plot <- plot_distr_cells(n_obs_ind, width) return(list(n_obs_ind = n_obs_ind, plot = plot)) } ``` ```r ana_result <- analyse_obs( species = "Chorthippus biguttulus", year = 2010, max_coord_uncertain = 5000, decimalLatitude = "latitude", decimalLongitude = "longitude", width = 5 ) ana_result$n_obs_ind ana_result$plot analyse_obs( species = "Harmonia axyridis", year = 2011 )$plot ``` ## Notes ### Example of data masking using tidyselect ```r my_select <- function(df, cols_to_select){ dplyr::select(df, dplyr::all_of(cols_to_select)) } my_select(population, c("country", "year")) ` ```