owned this note
owned this note
Published
Linked with GitHub
# INBO CODING CLUB
<!---
Set date as DD month yyyy, e.g.
27 October 2020
-->
29 April 2021
Welcome!
## Share your code snippet
If you want to share your code snippet, copy paste your snippet within a section of three backticks (```):
As an **example**:
```
library(tidyverse)
```
(*you can copy paste this example and add your code further down*)
## Progress
Put your name + " | " and add a "*" each time you solve a challenge (see below).
## Participants
Name | Group | Challenges
--- | --- | ---
Damiano Oldoni | |
Hans Van Calster |1|*
Patrik Oosterlynck | |
Joost Vanoverbeke |4|**
Suzanna Lettens |1|**
Lisse Goris |4|***
Amber Mertens |2|**
Loïc van Doorn |2|
Dirk Maes |1| **
Lucia Manzanares | |
Tom De Dobbelaer |3 |**
Leen Govaere |3| * |
Raïsa Carmen |2|**
Jasmijn Hillaert |1|*
Adriaan Seynaeve |*||
Vincent Smeekens |
Anja Leyman |1|***
Lynn Pallemaerts | 4 | **
## Challenge 1
Hans
````
library(tidyverse)
# load aquatic trap data
df <- read_csv("./data/20210429/20210429_aquatictrap_data.csv")
# Display the columns id and weight_total
df %>% select(id, weight_total)
# Display the distinct values of location_id and location_name
df %>% distinct(location_id, location_name)
df %>% distinct(location_id)
df %>% distinct(location_name)
# Display data where both weight and weight_total are missing
df %>% filter(is.na(weight), is.na(weight_total))
# Order the rows by number of trapped individuals (n_individuals). How to order in increasing order? And in decreasing order?
df %>% arrange(n_individuals)
df %>% arrange(desc(n_individuals))
# Improve previous ordering by adding date as second variable (in case same number of individuals occurs). Order both variables in descending order
df %>% arrange(desc(n_individuals), desc(date))
# Display id, species, n_individuals, weight and weight_total for observations with non-empty values of weight_total
df %>%
select(id, species, n_individuals, weight, weight_total) %>%
filter(!is.na(weight_total))
````
Leen
```
unique(df$location_id)
unique(df$location_name)
```
Lisse
```
#2
df %>% select(location_id, location_name) %>%
distinct(location_id,location_name)
#3
df %>% filter(is.na(weight), is.na(weight_total))
#4
df %>% arrange(desc(n_individuals), desc(date))
#5
df %>% filter(!is.na(weight_total)) %>%
select(id, species, n_individuals, weight, weight_total)
```
Tom
```
df %>%
select (id, weight_total)
View(df)
df %>%
distinct(location_id)
df %>%
distinct(location_name)
df %>%
filter_at(vars(weight, weight_total), all_vars(is.na(.)))
df %>%
arrange(n_individuals)
df %>%
arrange(desc(n_individuals))
df %>%
arrange(desc(n_individuals), desc(date))
df%>%
filter(!is.na(weight_total))%>%
select(id, species,n_individuals,weight,weight_total)
```
## INTERMEZZO
## Challenge 2
Lisse
```
#1
df_cleaned <- df %>%
mutate(weight_total = if_else(is.na(weight_total), weight, weight_total)) %>%
select(-weight)
#2
df_cleaned <- df_cleaned %>%
mutate(weight = weight_total/n_individuals) %>%
select(id, species, n_individuals, n_traps, starts_with("weight"),everything())
#3
df_cleaned <- df_cleaned %>%
mutate(location_name = case_when(location_name == "Zandplaat Kastel" ~ "Kastel, zandplaat",
location_name == "Grens Steendorp/Temse" ~ "Steendorp, grens met Temse",
T ~ location_name))
```
Hans
```
# Set weight_total equal to weight if empty, remove column weight and save as new object df_cleaned
df_cleaned <- df %>%
mutate(weight_total = ifelse(is.na(weight_total), weight, weight_total)) %>%
select(-weight)
# Based on number of trapped individuals (n_individuals) and weight_total, calculate the average weight for each catch and set it in a new column of df_cleaned called weight. Put the columns id, species, n_individuals, n_traps and all ones starting with "weight" ahead
df_cleaned <- df_cleaned %>%
mutate(weight = weight_total / n_individuals) %>%
relocate(id, species, n_individuals, n_traps, starts_with("weight"))
# Improve location_name by applying these changes:
# original term new term
recode_named_vector <- c(
"Zandplaat Kastel" = "Kastel, zandplaat",
"Grens Steendorp/Temse" = "Steendorp, grens met Temse"
)
df_cleaned %>%
mutate(location_name = recode(location_name, !!!recode_named_vector)) %>%
distinct(location_name)
```
## Challenge 3
Lisse
```
#1
df_spec_loc <- df_cleaned %>%
group_by(species, location_id) %>%
summarise(weight_mean = mean(weight, na.rm = T),
depth_mean = mean(depth, na.rm = T),
length_mean = mean(length, na.rm = T),
min_n = min(n_individuals),
max_n = max(n_individuals),
tot_n = sum(n_individuals),
first_deployment_rate = min(date),
last_deployment_rate = max(date)) %>%
ungroup()
Joost
df_mean <-
df_cleaned %>%
group_by(species, location_name) %>%
summarise(across(c(weight, depth, length), mean, na.rm = TRUE),
across(n_individuals, list(min = ~min(., na.rm = TRUE), max = ~max(., na.rm = TRUE)), sum = ~sum(., na.rm = TRUE)),
across(date, list(min_deployment = ~min(.), max_deployment = ~max(.))),
groups = "drop")
#2
df_date_loc <- df_cleaned %>%
group_by(date, location_id) %>%
summarise(total = n()) %>%
ungroup() %>%
top_n(10, total) %>%
arrange(desc(total))
#3
df_loc <- df_cleaned %>%
group_by(location_id) %>%
filter(!is.na(n_individuals), n_individuals > 0 ) %>%
summarise(n_succes = n()) %>%
ungroup()
#4
loc5000 <- df_loc$location_id[which(df_loc$n_succes>5000)]
df_spec_loc5000 <- df_cleaned %>%
filter(location_id %in% loc5000) %>%
group_by(species, location_id) %>%
summarise(weight_mean = mean(weight, na.rm = T),
depth_mean = mean(depth, na.rm = T),
length_mean = mean(length, na.rm = T),
min_n = min(n_individuals),
max_n = max(n_individuals),
tot_n = sum(n_individuals),
first_deployment_rate = min(date),
last_deployment_rate = max(date)) %>%
ungroup()
```
Hans
```
# For each combination of species and location, calculate the weight_mean, depth_mean, the length_mean, the minimum, the maximum and the total number of trapped individuals (min_n, max_n, tot_n), the date of the oldest and the most recent campaign (first_deployment_date, last_deployment_date)
species_loc_summary <- df_cleaned %>%
group_by(species, location_name) %>%
summarise(weight_mean = mean(weight, na.rm = TRUE),
depth_mean = mean(depth, na.rm = TRUE),
length_mean = mean(length, na.rm = TRUE),
min_n = min(n_individuals, na.rm = TRUE),
max_n = max(n_individuals, na.rm = TRUE),
tot_n = sum(n_individuals, na.rm = TRUE),
first_deployment_date = min(date, na.rm = TRUE),
last_deployment_date = max(date, na.rm = TRUE),
.groups = "drop")
species_loc_summary <- df_cleaned %>%
group_by(species, location_name) %>%
summarise(across(.cols = c(weight, depth, length), mean, na.rm = TRUE, .names = "{.col}_mean"),
across(.cols = c(n_individuals, date), min, na.rm = TRUE, .names = "{.col}_min"),
across(.cols = c(n_individuals, date), max, na.rm = TRUE, .names = "{.col}_max"),
tot_n = sum(n_individuals, na.rm = TRUE),
.groups = "drop")
# How many measurement campaigns for each date-location? Select the top 10
df_cleaned %>%
count(date, location_name, name = "measurement_campaigns") %>%
top_n(10)
# For each location, calculate number of successful catches, n_success, (= n_individuals > 0 or species not NA) and number of species, n_species
location_summary <- df_cleaned %>%
group_by(location_name) %>%
summarise(n_succes = sum(n_individuals > 0, na.rm = TRUE),
n_species = n_distinct(species))
# Calculate the statistics in 1 only for locations with more than 5000 successful catches
species_loc_summary %>%
semi_join(location_summary %>%
filter(n_succes > 5000)
)
```