INBO CODING CLUB

# INBO CODING CLUB 26 June 2025 Welcome! ## Share your code snippet If you want to share your code snippet, copy paste your snippet within a section of three backticks (```): As an **example**: ``` library(tidyverse) ``` (*you can copy paste this example and add your code further down*) ## Yellow sticky notes No yellow sticky notes online. Put your name + " | " and add a "*" each time you solve a challenge (see below). ## Participants Name | Challenges --- | --- Damiano Oldoni | *** Emma Cartuyvels| *** Pieter Huybrechts | \**** Jorre Vannieuwenhuyze | Larissa Bonifacio | Falk Mielke | Lien Reyserhove | Rhea Maesele | Sebastiaan Verbesselt | * ## References --- > *Falk:* I once introduced the basic concept of functions [here](https://mielke.ws/python_cursus/#/page/functions). > (Content-wise same as Damiano's introduction, with a different target audience, metaphor, and programming language.) --- style reference on the (non-)use of `return`: https://adv-r.hadley.nz/functions.html#implicit-versus-explicit-returns --- ## Challenge 0 ### Jorre ```r make_bread <- function(grains,yeast,water,salt) { return(grains + yeast + water + salt) } make_focaccia <- function(grains,yeast,water,salt) { return(grains + 1.5 * yeast + 0.7 * water + 2 * salt) } make_doughs <- function(grains, yeast, water, salt) { # Code to generate `bread` and `focaccia` bread <- make_bread(grains,yeast,water,salt) focaccia <- make_focaccia(grains,yeast,water,salt) # Combine bread and focaccia as a list of doughs doughs <- list(bread = bread, focaccia = focaccia) return(doughs) } ``` ### Falk *(for Python aficionados)* ```python # translation: def make_doughs(grains: float, yeast: float, water: float, salt: float): # mix ingredients in a certain ration to bake bread. bread = grains + yeast + water + salt focaccia = grains + 1.5 * yeast + 0.7 * water + 2 * salt return({"bread": bread, "focaccia": focaccia}) # let's make this more atomic! all_ingredients = ["grains", "yeast", "water", "salt"] ingredient_weights = { "bread": {ingredient: 1.0 for ingredient in all_ingredients}, "focaccia": {"grains": 1.0, "yeast": 1.5, "water": 0.7, "salt": 2.0} } def make_dough(ingredients: dict, dough: str = "bread"): # choose a dough and prepare it from ingredients mixture = [ \ ingredients.get(ingredient, 0.0) * ingredient_weights[dough][ingredient] \ for ingredient in ingredient_weights[dough].keys()] return(sum(mixture)) def make_bread(**kwargs): # exploiting Python "signature wildcards": e.g. https://codefather.tech/blog/python-args-kwargs return make_dough(ingredients = kwargs, dough = "bread") # example: make_bread(grains = 20, yeast = 1, water = 2, salt = 3) ``` ### Larissa ```r library(tidyverse) make_bread <- function(grains, yeast, water, salt) { #Code to generate bread bread <- grains + yeast + water + salt return(bread) } make_focaccia <- function(grains, yeast, water, salt){ focaccia <- grains + 1.5 * yeast + 0.7 * water + 2 * salt return(focaccia) } make_doughs <- function(bread, focaccia){ doughs <- list(bread, focaccia) return(doughs) } x = make_bread(1, 2, 3, 4) y = make_focaccia(5, 6, 7, 8) make_doughs(x, y) ``` ### Sebastiaan ```r library(tidyverse) make_bread <- function(grains, yeast, water, salt) { #Code to generate bread bread <- grains + yeast + water + salt return(bread) } make_focaccia <- function(grains, yeast, water, salt){ focaccia <- grains + 1.5 * yeast + 0.7 * water + 2 * salt return(focaccia) } make_doughs <- function(bread, focaccia){ doughs <- list(bread, focaccia) return(doughs) } x = make_bread(1, 2, 3, 4) y = make_focaccia(5, 6, 7, 8) make_doughs(x, y) ``` ## Challenge 1 ### Emma's solution ```r read_moth <- function(year){ file_name <- paste0("20250626_moth_obs_", year, ".csv") path <- file.path("data", "20250626", file_name) obs <- readr::read_csv(path) obs <- obs %>% dplyr::mutate(year = lubridate::year(.data$eventDate)) return(obs) } get_effort <- function(df){ require(dplyr) effort_year <- df %>% dplyr::group_by(year, locationID, deploymentID) %>% dplyr::summarise( trap_nights = dplyr::n_distinct(eventDate), .groups = "drop_last" ) %>% dplyr::summarise( effort = sum(trap_nights), .groups = "drop" ) return(effort_year) } get_abundance <- function(df){ require(dplyr) abundance_year <- df %>% dplyr::group_by(year, locationID, species) %>% dplyr::summarise( abundance = sum(individualCount), .groups = "drop" ) return(abundance_year) } get_richness <- function(df){ require(dplyr) richness_year <- df %>% dplyr::group_by(year, locationID) %>% dplyr::summarise( richness = n_distinct(species), .groups = "drop" ) return(richness_year) } ``` ### Pieter's solution ```r #' Read Moth Observations #' #' This function reads moth observation data from a specified file path and #' calculates yearly effort, abundance, and richness of moth species. #' #' An extra column year is added based on the eventDate. #' #' @param filepath Path to the CSV file containing moth observations. #' @param year Year for which the data is to be processed. Default is 2022. #' #' @return A data frame containing yearly species richness per location. #' #' @examples #' read_moth(year = 2022) #' read_moth(year = 2023) #' read_moth <- function(filepath, year = 2022) { selected_year <- year # Read from file ---------------------------------------------------------- filepath <- file.path("data", "20250626", sprintf("20250626_moth_obs_%s.csv", selected_year)) df <- readr::read_delim(filepath, show_col_types = FALSE, progress = FALSE) # (optional) Add year column ---------------------------------------------- if (!"year" %in% colnames(df)) { # Add `year` column based on `eventDate` df <- dplyr::mutate(df, year = lubridate::year(.data$eventDate)) } # Filter and return df ---------------------------------------------------- obs <- dplyr::filter(df, .data$year %in% selected_year) if(!all(selected_year %in% df$year)){ missing_years <- selected_year[!selected_year %in% df$year] cli::cli_warn( "No observations found for {length(missing_years)} year{?s}: {.field {missing_years}} in {.file {filepath}}." ) } return(obs) } #' Calculate the total yearly effort #' #' Calculate the total yearly effort for each `locationID` as the total number of #' trap days over all deploymentIDs linked to each `locationID` #' #' @param df A data frame containing moth observations. #' #' @return A data frame with total yearly effort per `locationID` and #' `deploymentID`. #' #' @examples #' read_moth(year = 2023) |> #' get_effort() get_effort <- function(df) { # Calculate effort ---- # Calculate the total yearly effort for each `locationID` as the total number # of trap days over all deploymentIDs linked to each `locationID` effort_year <- df %>% dplyr::group_by(year, locationID, deploymentID) %>% dplyr::summarise( trap_nights = dplyr::n_distinct(eventDate), .groups = "drop_last" ) %>% dplyr::summarise( effort = sum(trap_nights), .groups = "drop" ) # Return effort df ---- return(effort_year) } #' Calculate yearly abundance #' #' Calculate yearly abundance as the sum of individual counts per species and #' `locationID`. #' #' @param df A data frame containing moth observations. #' #' @return A data frame with yearly abundance per `locationID` and `species`. #' #' @examples #' read_moth(year = 2022) |> #' get_abundance() get_abundance <- function(df) { # Calculate yearly abundance as the sum of individual counts per species and # `locationID` dplyr::summarise(df, .by = dplyr::all_of(c("year", "locationID", "species")), abundance = sum(individualCount) ) } #' Calculate species richness #' #' Calculate species richness per `locationID` as the number of unique species #' observed in each year. #' #' @param df A data frame containing moth observations. #' #' @return A data frame with species richness per `locationID` and `year`. #' #' @examples #' read_moth(year = 2022:2023) |> #' get_richness() get_richness <- function(df) { # Calculate species richness per `locationID` as number of unique species # observed dplyr::summarise(df, richness = dplyr::n_distinct(species), .by = dplyr::all_of(c("year", "locationID")) ) } ``` ### Jorre `20250626_functions.R`: ```r # Write a function called read_moth() that reads the moth data from a file and # returns a data frame. The function should have an argument: year (number), # which is the year of the data to read. Tip: sprintf() can be useful, e.g. # sprintf("Damiano is born in %d.", 1982). read_moth <- function(path,year) { year |> sprintf(fmt="20250626_moth_obs_%i.csv") %>% file.path(path, .) |> readr::read_csv() } # Edna adds a year column immediately in her workflow. So, please add this small # step in read_moth. read_moth <- function(path,year) { year |> sprintf(fmt="20250626_moth_obs_%i.csv") %>% file.path(path, .) |> readr::read_csv() |> dplyr::mutate(year = lubridate::year(.data$eventDate)) } # Write three functions: get_effort(), get_abundance() and get_richness() to # calculate respectively the yearly effort, abundance and richness. The function # must have an argument: df (data frame), which is the data to use. The function # should return a data frame with the effort, abundance, or richness. get_effort <- function(df) { df %>% dplyr::group_by(year, locationID, deploymentID) %>% dplyr::summarise( trap_nights = dplyr::n_distinct(eventDate), .groups = "drop_last" ) %>% dplyr::summarise( effort = sum(trap_nights), .groups = "drop" ) } get_abundance <- function(df) { df %>% dplyr::group_by(year, locationID, species) %>% dplyr::summarise( abundance = sum(individualCount), .groups = "drop" ) } get_richness <- function(df) { df %>% dplyr::group_by(year, locationID) %>% dplyr::summarise( richness = n_distinct(species), .groups = "drop" ) } ``` `20250626_workflow.R`: ```r obs <- read_moth("data/20250626",2022) effort_year <- get_effort(obs) abundance_year <- get_abundance(obs) richness_year <- get_richness(obs) ``` ### Sebastiaan `20250626_workflow.R`: ```r library(tidyverse) source("./src/20250626/20250626_functions.R") # CHALLENGE 1 #### obs <- read_moth(2022) get_effort(obs) get_abundance(obs) get_richness(obs) ``` `20250626_functions.R`: ```r library(tidyverse) library(dplyr) # CHALLENGE 1 #### read_moth <- function(year){ file_name_complete <- paste0("20250626_moth_obs_",year,".csv") path <- file.path("data", "20250626", file_name_complete) obs <- readr::read_csv(path) obs <- obs %>% dplyr::mutate(year = lubridate::year(.data$eventDate)) return(obs) } get_effort <- function(data){ effort_year <- data %>% dplyr::group_by(year, locationID, deploymentID) %>% dplyr::summarise( trap_nights = dplyr::n_distinct(eventDate), .groups = "drop_last" ) %>% dplyr::summarise( effort = sum(trap_nights), .groups = "drop" ) return(effort_year) } get_abundance <- function(data){ abundance_year <- data %>% dplyr::group_by(year, locationID, species) %>% dplyr::summarise( abundance = sum(individualCount), .groups = "drop" ) return(abundance_year) } get_richness <- function(data){ richness_year <- data %>% dplyr::group_by(year, locationID) %>% dplyr::summarise( richness = n_distinct(species), .groups = "drop" ) return(richness_year) } ``` ### Falk *(Python)* #### part 1 and 2 ```python import pathlib as pl import pandas as pd def read_moth(file_name, base_path = pl.Path(".")/"data", **kwargs): # read a moth data file # combine the file path file_path = pl.Path(base_path)/file_name # read observations obs = pd.read_csv(file_path, **kwargs) # add a year column (sould better use a date data type instead.) obs["year"] = [ev_date[:4] for ev_date in obs["eventDate"].values] # give back the observation data return (obs) # we can also create a function which reads by "year" argument. def read_moth_year(year, **kwargs): file_name = f"20250626_moth_obs_{year}.csv" # https://realpython.com/python-string-formatting return(read_moth(file_name, **kwargs)) # example: obs = read_moth_year(2022, sep = ",") print(obs.sample(5)) ``` #### part 3 ```python # functions are "first class citizens" in R and python: # you can make lists of them, pass them to other functions, etc. # https://en.wikipedia.org/wiki/First-class_citizen calculator_functions = { \ "effort": lambda data: data.groupby(["year", "locationID", "deploymentID"]).eventDate.nunique().groupby(level = [0, 1]).agg("sum").reset_index().rename(columns = {"eventDate": "effort"}), "abundance": lambda data: data.groupby(["year", "locationID", "species"]).individualCount.sum().reset_index().rename(columns = {"individualCount": "abundance"}), "richness": lambda data: data.groupby(["year", "locationID"]).species.nunique().reset_index().rename(columns = {"species": "richness"}) } # (these one-line functions might be cumbersome, but you get the idea.) # (in Pyhton, `lambda args: [...]` is a shorthand for `def unnamed_function(args): [...]`.) def calculate(measure, data): return(calculator_functions[measure](data)) # example: print(calculate("abundance", obs)) ``` ## Challenge 2 ### Emma's solution ```r get_effort <- function(df, breaks = c(-Inf, 9, 19, Inf), labels = c("low", "medium", "high")){ require(dplyr) effort_year <- df %>% dplyr::group_by(year, locationID, deploymentID) %>% dplyr::summarise( trap_nights = dplyr::n_distinct(eventDate), .groups = "drop_last" ) %>% dplyr::summarise( effort = sum(trap_nights), .groups = "drop" ) %>% dplyr::mutate(categoric_effort = cut(effort, breaks = breaks, labels = labels ) ) return(effort_year) } get_abundance <- function(df, breaks = c(-Inf, 9, 49, Inf), labels = c("low", "medium", "high")){ require(dplyr) abundance_year <- df %>% dplyr::group_by(year, locationID, species) %>% dplyr::summarise( abundance = sum(individualCount), .groups = "drop" ) %>% dplyr::mutate(categoric_abundance = cut(abundance, breaks = breaks, labels = labels ) ) return(abundance_year) } get_richness <- function(df, breaks = c(-Inf, 5, 10, Inf), labels = c("low", "medium", "high")){ require(dplyr) richness_year <- df %>% dplyr::group_by(year, locationID) %>% dplyr::summarise( richness = n_distinct(species), .groups = "drop" ) %>% dplyr::mutate(categories_richness = cut(richness, breaks = breaks, labels = labels ) ) return(richness_year) } # Challenge 2 plot_abundance <- function(df, species, lng = "EN"){ plot_abundance_year_cossus_cossus <- df %>% dplyr::filter(species == species) %>% ggplot2::ggplot( ggplot2::aes(x = locationID, y = abundance, fill = categoric_abundance)) + geom_col() + if(lng == "EN"){ labs(title = paste0("Abundance of ", species, " by Location"), x = "Location", y = "Abundance") } else if (lng == "NL"){ labs(title = paste0("Abundantie van ", species, " per Locatie"), x = "Locatie", y = "Abundantie") } else { stop("Unknown language") } return(plot_abundance_year_cossus_cossus) } ``` ### Sebastiaan's solution `20250626_workflow.R`: ```r get_effort(data = obs) get_abundance(data = obs) get_abundance(data = obs) get_richness(data = obs) # Plot abundance of Cossus cossus by location language <- "Dutch" plot_abundance(obs,language) language <- "English" plot_abundance(obs,language) language <- "Spanish" plot_abundance(obs,language) ``` `20250626_functions.R`: ```r get_effort <- function(data, breaks = c(-Inf, 9, 19, Inf), labels = c("low", "medium", "high")){ effort_year <- data %>% dplyr::group_by(year, locationID, deploymentID) %>% dplyr::summarise( trap_nights = dplyr::n_distinct(eventDate), .groups = "drop_last" ) %>% dplyr::summarise( effort = sum(trap_nights), .groups = "drop" ) effort_year <- effort_year %>% dplyr::mutate(categoric_effort = cut(effort, breaks, labels ) ) return(effort_year) } get_abundance <- function(data,breaks = c(-Inf, 9, 49, Inf), labels = c("low", "medium", "high")){ abundance_year <- data %>% dplyr::group_by(year, locationID, species) %>% dplyr::summarise( abundance = sum(individualCount), .groups = "drop" ) abundance_year <- abundance_year %>% dplyr::mutate(categoric_abundance = cut(abundance, breaks , labels ) ) return(abundance_year) } get_richness <- function(data, breaks = c(-Inf, 5, 10, Inf), labels = c("low", "medium", "high")){ richness_year <- data %>% dplyr::group_by(year, locationID) %>% dplyr::summarise( richness = n_distinct(species), .groups = "drop" ) richness_year <- richness_year %>% dplyr::mutate(categories_richness = cut(richness, breaks ), labels ) return(richness_year) } plot_abundance <- function(data,language){ abundance_year <- get_abundance(data) if (language == "English"){ plot_abundance_year_cossus_cossus <- abundance_year %>% dplyr::filter(species == "Cossus cossus") %>% ggplot2::ggplot( ggplot2::aes(x = locationID, y = abundance, fill = categoric_abundance)) + geom_col() + labs(title = "Abundance of Cossus cossus by Location", x = "Location", y = "Abundance") return(plot_abundance_year_cossus_cossus) } else if (language == "Dutch"){ plot_abundance_year_cossus_cossus_nl <- abundance_year %>% dplyr::filter(species == "Cossus cossus") %>% ggplot2::ggplot( ggplot2::aes(x = locationID, y = abundance, fill = categoric_abundance)) + geom_col() + labs(title = "Abundantie van Cossus cossus per Locatie", x = "Locatie", y = "Abundantie") return(plot_abundance_year_cossus_cossus_nl) } else { print("plot in this language is not available") } } ``` ### Pieter's solution ```r #' Calculate the total yearly effort #' #' Calculate the total yearly effort for each `locationID` as the total number of #' trap days over all deploymentIDs linked to each `locationID` #' #' @param df A data frame containing moth observations. #' #' @return A data frame with total yearly effort per `locationID` and #' `deploymentID`. #' #' @examples #' read_moth(year = 2023) |> #' get_effort() get_effort <- function(df, breaks = c(-Inf, 9, 19, Inf), labels = c("low", "medium", "high")) { # Calculate effort ---- # Calculate the total yearly effort for each `locationID` as the total number # of trap days over all deploymentIDs linked to each `locationID` effort_year <- df %>% dplyr::group_by(year, locationID, deploymentID) %>% dplyr::summarise( trap_nights = dplyr::n_distinct(eventDate), .groups = "drop_last" ) %>% dplyr::summarise( effort = sum(trap_nights), .groups = "drop" ) %>% ## Add breaks, labels ---- dplyr::mutate(categoric_effort = cut(effort, breaks = c(-Inf, 9, 19, Inf), labels = c("low", "medium", "high") )) # Return effort df ---- return(effort_year) } #' Calculate yearly abundance #' #' Calculate yearly abundance as the sum of individual counts per species and #' `locationID`. #' #' @param df A data frame containing moth observations. #' #' @return A data frame with yearly abundance per `locationID` and `species`. #' #' @examples #' read_moth(year = 2022) |> #' get_abundance() get_abundance <- function(df, breaks = c(-Inf, 9, 49, Inf), labels = c("low", "medium", "high")) { # Calculate yearly abundance as the sum of individual counts per species and # `locationID` dplyr::summarise(df, .by = dplyr::all_of(c("year", "locationID", "species")), abundance = sum(individualCount) ) %>% dplyr::mutate(categoric_abundance = cut(abundance, breaks = c(-Inf, 9, 49, Inf), labels = c("low", "medium", "high") )) } #' Calculate species richness #' #' Calculate species richness per `locationID` as the number of unique species #' observed in each year. #' #' @param df A data frame containing moth observations. #' #' @return A data frame with species richness per `locationID` and `year`. #' #' @examples #' read_moth(year = 2022:2023) |> #' get_richness() get_richness <- function(df, breaks = c(-Inf, 5, 10, Inf), labels = c("low", "medium", "high")) { # Calculate species richness per `locationID` as number of unique species # observed dplyr::summarise(df, richness = dplyr::n_distinct(species), .by = dplyr::all_of(c("year", "locationID")) ) %>% dplyr::mutate(categories_richness = cut(richness, breaks = c(-Inf, 5, 10, Inf), labels = c("low", "medium", "high") )) } #' Plot Abundance of Cossus cossus #' #' @param df #' @param species A character string specifying the species to plot. Default is #' "Cossus cossus". #' @param language A character string specifying the language for the plot. #' #' @return A ggplot object showing the abundance of the specified species by #' location. #' #' @examples #' read_moth(year = 2022) |> #' get_abundance() |> #' plot_abundance(species = "Cossus cossus", language = "english") #' #' read_moth(year = 2023) |> #' get_abundance() |> #' plot_abundance(species = "Chrysoteuchia culmella", language = "dutch") plot_abundance <- function(df, species = "Cossus cossus", language = c("english", "dutch")) { selected_species <- species # Plot abundance of Cossus cossus by location ----------------------------- plot_abundance_year_cossus_cossus <- df %>% dplyr::filter(species == selected_species) %>% ggplot2::ggplot( ggplot2::aes(x = locationID, y = abundance, fill = categoric_abundance)) + geom_col() + labs(title = glue::glue("Abundance of {selected_species} by Location"), x = "Location", y = "Abundance") ## Same plot with text in Dutch ---- plot_abundance_year_cossus_cossus_nl <- df %>% dplyr::filter(species == selected_species) %>% ggplot2::ggplot( ggplot2::aes(x = locationID, y = abundance, fill = categoric_abundance)) + geom_col() + labs(title = glue::glue("Abundantie van {selected_species} per Locatie"), x = "Locatie", y = "Abundantie") # Return the requested plot ----------------------------------------------- switch(rlang::arg_match(language), english = plot_abundance_year_cossus_cossus, dutch = plot_abundance_year_cossus_cossus_nl) } ``` ### Jorre `20250626_functions.R`: ```r # Improve the functions get_effort(), get_abundance() and get_richness() by # adding two arguments, breaks (numeric vector) and labels (character vector). # Use the values provided by Edna as default values. The improved functions will # return a data frame with a column more than the previous functions. This # column is respectively called categoric_effort, categoric_abundance and # categoric_richness. get_effort <- function( df, breaks = c(-Inf, 9, 19, Inf), labels = c("low", "medium", "high") ) { df %>% dplyr::group_by(year, locationID, deploymentID) %>% dplyr::summarise( trap_nights = dplyr::n_distinct(eventDate), .groups = "drop_last" ) %>% dplyr::summarise( effort = sum(trap_nights), .groups = "drop" ) |> dplyr::mutate( categoric_effort = cut( effort, breaks = breaks, labels = labels ) ) } get_abundance <- function( df, breaks = c(-Inf, 9, 49, Inf), labels = c("low", "medium", "high") ) { df %>% dplyr::group_by(year, locationID, species) %>% dplyr::summarise( abundance = sum(individualCount), .groups = "drop" ) %>% dplyr::mutate( categoric_abundance = cut( abundance, breaks = breaks, labels = labels ) ) } get_richness <- function( df, breaks = c(-Inf, 5, 10, Inf), labels = c("low", "medium", "high") ) { df %>% dplyr::group_by(year, locationID) %>% dplyr::summarise( richness = n_distinct(species), .groups = "drop" ) %>% dplyr::mutate( categories_richness = cut( richness, breaks = breaks, labels = labels ) ) } # Write a function called plot_abundance() with an argument called df with a # data frame containing abundance data (see output of get_abundance()) and an # argument called species (character). The function returns a bar plot of the # abundance with the provided species in the title. plot_abundance <- function(df,thespecies) { df %>% dplyr::filter(species == thespecies) %>% ggplot2::ggplot( ggplot2::aes(x = locationID, y = abundance, fill = categoric_abundance)) + geom_col() + labs(title = "Abundance of Cossus cossus by Location", x = "Location", y = "Abundance") } # Edna must create the same plots with title and axis labels in Dutch for a # workshop with citizen scientists. Can you improve plot_abundance() to handle # it? You can use English title and labels as default values. plot_abundance <- function( df, thespecies, thetitle = "Abundance of Cossus cossus by Location", xaxis_title = "Location", yaxis_title = "Abundance" ) { df %>% dplyr::filter(species == thespecies) %>% ggplot2::ggplot( ggplot2::aes(x = locationID, y = abundance, fill = categoric_abundance)) + geom_col() + labs(title = thetitle, x = xaxis_title, y = yaxis_title) } ``` `20250626_workflow.R`: ```r effort_year <- get_effort(obs) abundance_year <- get_abundance(obs) richness_year <- get_richness(obs) plot_abundance( abundance_year, "Cossus cossus", thetitle = "Abundantie van Cossus cossus per Locatie", xaxis_title = "Locatie", yaxis_title = "Abundantie" ) ``` ### Falk *(Python)* *(only part 1)* ```python # ... in addition to the above: import numpy as np # defaults which vary by measure default_breaks = { "effort": [-np.inf, 9, 19, np.inf], "abundance": [-np.inf, 9, 49, np.inf], "richness": [-np.inf, 5, 10, np.inf] } def categorize(calculation: pd.DataFrame, measure: str = None, breaks: list = None, labels: list = ["low", "medium", "high"]): # categorize the outcome of a calculation # if no column is provided explicitly, use the last one if measure is None: measure = calculation.columns[-1] # breaks default to the values from the dict above if breaks is None: breaks = default_breaks[measure] # `labels` have their default set in the function signature. labels = labels[:len(breaks)-1] # lots of safety checks are omitted: # - does the length of labels fit the breaks? # - is the measure really among the columns? # - are breaks well-defined? (numeric, increasing, ...) # but on the other hand, this keeps the function short and instructive. # calculation of categories calculation[f"categoric_{measure}"] = pd.cut(calculation[measure], bins = breaks, labels = labels) return(calculation) # example: categorize(calculate("richness", obs)) ``` ## Challenge 3 ### Emma's solution ```r plot_effort <- function(df, lng = "EN"){ plot_effort_year <- ggplot(df, aes(x = locationID, y = effort, fill = categoric_effort)) + geom_col() + if(lng == "NL"){ labs(title = "Inspanning per Locatie", x = "Locatie", y = "Inspanning") } else if (lng == "EN"){ labs(title = "Effort by Location", x = "Location", y = "Effort") } else { stop("Unknown language") } return(plot_effort_year) } plot_richness <- function(df, lng = "EN"){ plot_richness_year <- ggplot(df, aes(x = locationID, y = richness, fill = categories_richness)) + geom_col() + if(lng == "NL"){ labs(title = "Soortenrijkdom per Locatie", x = "Locatie", y = "Soortenrijkdom") } else if (lng == "EN"){ labs(title = "Species Richness by Location", x = "Location", y = "Species Richness") } else { stop("Unknown language") } return(plot_richness_year) } get_indicator <- function(year, breaks_effort = c(-Inf, 9, 19, Inf), labels_effort = c("low", "medium", "high"), breaks_abundance = c(-Inf, 9, 49, Inf), labels_abundance = c("low", "medium", "high"), breaks_richness = c(-Inf, 5, 10, Inf), labels_richness = c("low", "medium", "high")){ assertthat::assert_that(is.numeric(year)) df <- read_moth(year) effort <- get_effort(df, breaks = breaks_effort, labels = labels_effort) abundance <- get_abundance(df, breaks = breaks_abundance, labels = labels_abundance) richness <- get_richness(df, breaks = breaks_richness, labels = labels_richness) return(list(effort = effort, abundance = abundance, richness = richness)) } ``` ### Pieter's solution ```r #' Plot Effort by Location #' #' @param df A data frame containing effort data from `get_effort`. #' @param language A character string specifying the language for the plot. Options #' are "english" and "dutch". Default is "english". #' #' @return A ggplot object showing effort by location. #' #' @examples #' read_moth(2022) |> #' get_effort() |> #' plot_effort(language = "dutch") plot_effort <- function(df, language = c("english", "dutch")) { labels_in_language <- switch(rlang::arg_match(language), english = list( title = "Effort by Location", x = "Location", y = "Effort" ), dutch = list( title = "Effort per Locatie", x = "Locatie", y = "Effort" ) ) plot_effort_year <- ggplot( df, aes( x = locationID, y = effort, fill = categoric_effort ) ) + geom_col() + do.call(labs, labels_in_language) return(plot_effort_year) } #' Plot Species Richness #' #' @param df A data frame containing species richness data from `get_richness`. #' @param language A character string specifying the language for the plot. Options #' are "english" and "dutch". Default is "english". #' #' @return A ggplot object showing species richness by location. #' #' @examples #' read_moth(2022) |> #' get_richness() |> #' plot_richness(language = "english") #' #' read_moth(2023) |> #' get_richness() |> #' plot_richness(language = "dutch") plot_richness <- function(df, language = c("english", "dutch")) { labels_in_language <- switch(rlang::arg_match(language), english = list( title = "Species Richness by Location", x = "Location", y = "Species Richness" ), dutch = list( title = "Soortenrijkdom per Locatie", x = "Locatie", y = "Soortenrijkdom" ) ) plot_richness_year <- ggplot( df, aes( x = locationID, y = richness, fill = categories_richness ) ) + geom_col() + do.call(labs, labels_in_language) plot_richness_year } #' Get Indicators #' #' This function retrieves indicators for moth observations, including effort, #' abundance, and richness, based on specified breaks and labels for each #' indicator. #' #' @param year Year for which the indicators are to be calculated. Default is #' 2022. #' @param richness_breaks Breaks for categorizing species richness. #' @param richness_labels Labels for categorizing species richness. #' @param abundance_breaks Breaks for categorizing abundance. #' @param abundance_labels Labels for categorizing abundance. #' @param effort_breaks Breaks for categorizing effort. #' @param effort_labels Labels for categorizing effort. #' #' @return #' #' @examples #' get_indicators(year = 2022) get_indicators <- function(year = 2022, richness_breaks = c(-Inf, 5, 10, Inf), richness_labels = c("low", "medium", "high"), abundance_breaks = c(-Inf, 9, 49, Inf), abundance_labels = c("low", "medium", "high"), effort_breaks = c(-Inf, 9, 19, Inf), effort_labels = c("low", "medium", "high")) { obs <- read_moth(year) list( effort = get_effort(obs, breaks = effort_breaks, labels = effort_labels), abundance = get_abundance(obs, breaks = abundance_breaks, labels = abundance_labels), richness = get_richness(obs, breaks = richness_breaks, labels = richness_labels) ) } ``` ### Jorre `20250626_functions.R`: ```r # Write two functions called plot_effort() and plot_richness() to return a bar # plot of effort and richness, respectively. These functions must allow Edna to # create plots with title and axis labels in Dutch as well. Again, English texts # are the default values. plot_effort <- function( df, thetitle = "Effort by Location", xaxis_title = "Location", yaxis_title = "Effort" ) { ggplot( df, aes(x = locationID, y = effort, fill = categoric_effort) ) + geom_col() + labs(title = thetitle, x = xaxis_title, y = yaxis_title ) } plot_richness <- function( df, thetitle = "Species Richness by Location", xaxis_title = "Location", yaxis_title = "Species Richness" ) { ggplot( df, aes(x = locationID, y = richness, fill = categories_richness) ) + geom_col() + labs(title = thetitle, x = xaxis_title, y = yaxis_title ) } # Automatise the entire workflow (without the plotting step) by creating a # macrofunction called get_indicators() with argument year (numeric) to read the # right csv file. This function must return a list of 3 data frames named # effort, abundance and richness. And do not forget to allow Edna to set custom # breaks and labels for the categorical effort/abundance/richness. get_indicators <- function( path, year, effort_breaks = c(-Inf, 9, 19, Inf), effort_labels = c("low", "medium", "high"), abundance_breaks = c(-Inf, 9, 49, Inf), abundance_labels = c("low", "medium", "high"), richness_breaks = c(-Inf, 5, 10, Inf), richness_labels = c("low", "medium", "high") ) { obs <- read_moth(path,year) list( effort_year = get_effort(obs,breaks=effort_breaks,labels=effort_labels), abundance_year = get_abundance(obs,breaks=abundance_breaks,labels=abundance_labels), richness_year = get_richness(obs,breaks=richness_breaks,labels=richness_labels) ) } ``` `20250626_workflow.R`: ```r plot_effort(effort_year, thetitle = "Effort by Location", xaxis_title = "Location", yaxis_title = "Effort" ) plot_richness(richness_year, thetitle = "Species Richness by Location", xaxis_title = "Location", yaxis_title = "Species Richness" ) get_indicators("data/20250626",2022) ``` ## Bonus Challenge ### Pieter's solution ```r plot_indicators <- function(indicators = get_indicators(), language = c("english", "dutch")) { language <- rlang::arg_match(language) list( effort_plot = plot_effort(purrr::chuck(indicators, "effort"), language), abundance_plots = purrr::map( purrr::chuck(indicators, "abundance", "species"), \(species, df = purrr::chuck(indicators, "abundance"), lang = language) { plot_abundance(df, species, language = lang) } ), richness_plot = plot_richness(purrr::chuck(indicators, "richness"), language) ) } ```