INBO CODING CLUB

# INBO CODING CLUB  29 April 2021 Welcome! ## Share your code snippet If you want to share your code snippet, copy paste your snippet within a section of three backticks (```): As an **example**: ``` library(tidyverse) ``` (*you can copy paste this example and add your code further down*) ## Progress Put your name + " | " and add a "*" each time you solve a challenge (see below). ## Participants Name | Group | Challenges --- | --- | --- Damiano Oldoni | | Hans Van Calster |1|* Patrik Oosterlynck | | Joost Vanoverbeke |4|** Suzanna Lettens |1|** Lisse Goris |4|*** Amber Mertens |2|** Loïc van Doorn |2| Dirk Maes |1| ** Lucia Manzanares | | Tom De Dobbelaer |3 |** Leen Govaere |3| * | Raïsa Carmen |2|** Jasmijn Hillaert |1|* Adriaan Seynaeve |*|| Vincent Smeekens | Anja Leyman |1|*** Lynn Pallemaerts | 4 | ** ## Challenge 1 Hans ```` library(tidyverse) # load aquatic trap data df <- read_csv("./data/20210429/20210429_aquatictrap_data.csv") # Display the columns id and weight_total df %>% select(id, weight_total) # Display the distinct values of location_id and location_name df %>% distinct(location_id, location_name) df %>% distinct(location_id) df %>% distinct(location_name) # Display data where both weight and weight_total are missing df %>% filter(is.na(weight), is.na(weight_total)) # Order the rows by number of trapped individuals (n_individuals). How to order in increasing order? And in decreasing order? df %>% arrange(n_individuals) df %>% arrange(desc(n_individuals)) # Improve previous ordering by adding date as second variable (in case same number of individuals occurs). Order both variables in descending order df %>% arrange(desc(n_individuals), desc(date)) # Display id, species, n_individuals, weight and weight_total for observations with non-empty values of weight_total df %>% select(id, species, n_individuals, weight, weight_total) %>% filter(!is.na(weight_total)) ```` Leen ``` unique(df$location_id) unique(df$location_name) ``` Lisse ``` #2 df %>% select(location_id, location_name) %>% distinct(location_id,location_name) #3 df %>% filter(is.na(weight), is.na(weight_total)) #4 df %>% arrange(desc(n_individuals), desc(date)) #5 df %>% filter(!is.na(weight_total)) %>% select(id, species, n_individuals, weight, weight_total) ``` Tom ``` df %>% select (id, weight_total) View(df) df %>% distinct(location_id) df %>% distinct(location_name) df %>% filter_at(vars(weight, weight_total), all_vars(is.na(.))) df %>% arrange(n_individuals) df %>% arrange(desc(n_individuals)) df %>% arrange(desc(n_individuals), desc(date)) df%>% filter(!is.na(weight_total))%>% select(id, species,n_individuals,weight,weight_total) ``` ## INTERMEZZO ## Challenge 2 Lisse ``` #1 df_cleaned <- df %>% mutate(weight_total = if_else(is.na(weight_total), weight, weight_total)) %>% select(-weight) #2 df_cleaned <- df_cleaned %>% mutate(weight = weight_total/n_individuals) %>% select(id, species, n_individuals, n_traps, starts_with("weight"),everything()) #3 df_cleaned <- df_cleaned %>% mutate(location_name = case_when(location_name == "Zandplaat Kastel" ~ "Kastel, zandplaat", location_name == "Grens Steendorp/Temse" ~ "Steendorp, grens met Temse", T ~ location_name)) ``` Hans ``` # Set weight_total equal to weight if empty, remove column weight and save as new object df_cleaned df_cleaned <- df %>% mutate(weight_total = ifelse(is.na(weight_total), weight, weight_total)) %>% select(-weight) # Based on number of trapped individuals (n_individuals) and weight_total, calculate the average weight for each catch and set it in a new column of df_cleaned called weight. Put the columns id, species, n_individuals, n_traps and all ones starting with "weight" ahead df_cleaned <- df_cleaned %>% mutate(weight = weight_total / n_individuals) %>% relocate(id, species, n_individuals, n_traps, starts_with("weight")) # Improve location_name by applying these changes: # original term new term recode_named_vector <- c( "Zandplaat Kastel" = "Kastel, zandplaat", "Grens Steendorp/Temse" = "Steendorp, grens met Temse" ) df_cleaned %>% mutate(location_name = recode(location_name, !!!recode_named_vector)) %>% distinct(location_name) ``` ## Challenge 3 Lisse ``` #1 df_spec_loc <- df_cleaned %>% group_by(species, location_id) %>% summarise(weight_mean = mean(weight, na.rm = T), depth_mean = mean(depth, na.rm = T), length_mean = mean(length, na.rm = T), min_n = min(n_individuals), max_n = max(n_individuals), tot_n = sum(n_individuals), first_deployment_rate = min(date), last_deployment_rate = max(date)) %>% ungroup() Joost df_mean <- df_cleaned %>% group_by(species, location_name) %>% summarise(across(c(weight, depth, length), mean, na.rm = TRUE), across(n_individuals, list(min = ~min(., na.rm = TRUE), max = ~max(., na.rm = TRUE)), sum = ~sum(., na.rm = TRUE)), across(date, list(min_deployment = ~min(.), max_deployment = ~max(.))), groups = "drop") #2 df_date_loc <- df_cleaned %>% group_by(date, location_id) %>% summarise(total = n()) %>% ungroup() %>% top_n(10, total) %>% arrange(desc(total)) #3 df_loc <- df_cleaned %>% group_by(location_id) %>% filter(!is.na(n_individuals), n_individuals > 0 ) %>% summarise(n_succes = n()) %>% ungroup() #4 loc5000 <- df_loc$location_id[which(df_loc$n_succes>5000)] df_spec_loc5000 <- df_cleaned %>% filter(location_id %in% loc5000) %>% group_by(species, location_id) %>% summarise(weight_mean = mean(weight, na.rm = T), depth_mean = mean(depth, na.rm = T), length_mean = mean(length, na.rm = T), min_n = min(n_individuals), max_n = max(n_individuals), tot_n = sum(n_individuals), first_deployment_rate = min(date), last_deployment_rate = max(date)) %>% ungroup() ``` Hans ``` # For each combination of species and location, calculate the weight_mean, depth_mean, the length_mean, the minimum, the maximum and the total number of trapped individuals (min_n, max_n, tot_n), the date of the oldest and the most recent campaign (first_deployment_date, last_deployment_date) species_loc_summary <- df_cleaned %>% group_by(species, location_name) %>% summarise(weight_mean = mean(weight, na.rm = TRUE), depth_mean = mean(depth, na.rm = TRUE), length_mean = mean(length, na.rm = TRUE), min_n = min(n_individuals, na.rm = TRUE), max_n = max(n_individuals, na.rm = TRUE), tot_n = sum(n_individuals, na.rm = TRUE), first_deployment_date = min(date, na.rm = TRUE), last_deployment_date = max(date, na.rm = TRUE), .groups = "drop") species_loc_summary <- df_cleaned %>% group_by(species, location_name) %>% summarise(across(.cols = c(weight, depth, length), mean, na.rm = TRUE, .names = "{.col}_mean"), across(.cols = c(n_individuals, date), min, na.rm = TRUE, .names = "{.col}_min"), across(.cols = c(n_individuals, date), max, na.rm = TRUE, .names = "{.col}_max"), tot_n = sum(n_individuals, na.rm = TRUE), .groups = "drop") # How many measurement campaigns for each date-location? Select the top 10 df_cleaned %>% count(date, location_name, name = "measurement_campaigns") %>% top_n(10) # For each location, calculate number of successful catches, n_success, (= n_individuals > 0 or species not NA) and number of species, n_species location_summary <- df_cleaned %>% group_by(location_name) %>% summarise(n_succes = sum(n_individuals > 0, na.rm = TRUE), n_species = n_distinct(species)) # Calculate the statistics in 1 only for locations with more than 5000 successful catches species_loc_summary %>% semi_join(location_summary %>% filter(n_succes > 5000) ) ```