# INBO CODING CLUB
29 April 2026
Welcome!
## Share your code snippet
If you want to share your code snippet, copy paste your snippet within a section of three backticks (```):
As an **example**:
```
library(tidyverse)
```
(*you can copy paste this example and add your code further down*)
## Yellow sticky notes
No yellow sticky notes online. Put your name + " | " and add a "*" each time you solve a challenge (see below).
## Participants
Name | Challenges
--- | ---
Damiano Oldoni | ***
Falk Mielke |
Mieke Verbeeck |
Adriaan Seynaeve |
Kaat Thienpont |
Droomelot De Gendt |
Arc'hantael Labrière |
Lawrence Whatley |
## Challenge 1
### Damiano's solution (example)
Copy paste this section to show your solutions.
```r
# dummy code
print("This is how to insert code.")
```
### Falk's trials
````markdown
## [1.1] Extract species
```{r use-distinct}
species_vec <- cube %>% pull(species) %>% unique()
species_df <- cube %>%
distinct(specieskey, species)
```
## [1.2] Filter "meaningful" occurrences
```{r filter-occurrences}
cube_occurrences_gt_1_precise <- cube %>%
filter(
occurrences > 1,
mincoordinateuncertaintyinmeters < 1000
)
cube_occurrences_gt_1_precise %>% glimpse()
```
## [1.3] are all species present in the cube? Of course!
Use [filtering joins](https://dplyr.tidyverse.org/reference/filter-joins.html)!
(naïve alternative: filter with `%in%`)
```{r filtering-joins}
missing_species <- cube_occurrences_gt_1_precise %>%
anti_join(
species_df,
by = join_by(specieskey, species)
) %>%
distinct(species)
if (nrow(missing_species)) {
message("missing species:")
missing_species %>%
knitr::kable()
}
```
```{r inverse-filtering-joins}
missing_species <- species_df %>%
anti_join(
cube_occurrences_gt_1_precise,
by = join_by(specieskey, species)
) %>%
distinct(specieskey, species)
missing_species %>%
knitr::kable()
```
## [1.4] minimal information cube
Learning to `select` only some data for an imaginary context.
Because the combination of `c(eeacellcode, year, specieskey)` should be unique per cube,
we could just use `select(eeacellcode, year, specieskey, occurrences)`.
However, more generally:
```{r extract-minimal-cube}
cube_minimal <- cube %>%
summarize(
occurrences = sum(occurrences),
.by = c(eeacellcode, year, specieskey)
)
cube_minimal %>% sample_n(3) %>% t() %>% knitr::kable()
```
## [1.5] relocate columns
*cf.* [`dplyr::relocate`](https://dplyr.tidyverse.org/reference/relocate.html)
```{r relocate-columns}
stopifnot(require("magrittr"))
cube %<>%
relocate(specieskey, species, year, eeacellcode)
cube %<>%
relocate(
occurrences, mincoordinateuncertaintyinmeters, mintemporaluncertainty,
.after = last_col()
)
cube %>% sample_n(3) %>% t() %>% knitr::kable()
```
````
### Adriaan's try:
```
#1.1
species <- unique(cube$species)
species_df <- species %>% as.data.frame()
colnames(species_df) <- c('specieskey')
#1.2
cube_occurrences_gt_1_precise <- cube %>% filter(occurrences > 1 & mincoordinateuncertaintyinmeters < 1000)
#1.3
species_gt1prec <- unique(cube_occurrences_gt_1_precise$species)
species_gt1prec %in% species
#1.4
cube_minimal <- cube %>% select(eeacellcode, specieskey, year)
#1.5
cube <- cube %>%
select(occurrences, mincoordinateuncertaintyinmeters , mintemporaluncertainty, any_of(names(cube))) %>%
relocate(specieskey) %>%
relocate(species, .after = specieskey) %>%
relocate(year, .after = species)
```
#2A
#2.1
cube <- cube %>% rename(cell_code = eeacellcode , min_coord_unc = mincoordinateuncertaintyinmeters, min_temp_unc = mintemporaluncertainty )
#2.2
Number_of_rows_per_species <- cube %>%
group_by(species, specieskey) %>%
summarise(n = n()) %>%
ungroup() %>%
arrange(-n, species)
#2.3
precision <- cube %>%
distinct(species, min_coord_unc) %>%
arrange(species, -min_coord_unc)
colnames(precision) <- c('species', 'coord_unc_values')
#2.4
species_name_key_prep <- cube %>%
distinct(species, specieskey) %>%
group_by(species) %>%
summarise(n_species = n())
species_name_key <- cube %>% left_join(species_name_key_prep, by = c('species')) %>%
select(species, specieskey, n_species) %>%
distinct() %>%
arrange(-n_species, species)
#2B
#2.1
species_richness_year <- cube %>%
distinct(year, specieskey) %>%
group_by(year) %>%
summarise(n_species = n()) %>%
ungroup()
#2.2
species_richness_year <- species_richness_year %>%
mutate(category = case_when(n_species <= 50 ~ 'low',
n_species > 51 & n_species <= 60 ~ 'medium',
n_species > 60 ~ 'high',
.unmatched = "error"))
#2.3
timeseries <- cube %>%
group_by(specieskey, year) %>%
summarise(n_occurrences=sum(occurrences), n_cells = n() ) %>%
arrange(specieskey , -year)
### Droomelot's test
```
#1.1
species_vector <- cube |>
distinct(species) |>
pull("species")
species_df <- cube |>
select("species") |>
distinct() |>
rename("specieskey" = species)
#1.2
cube_occurrences_gt_1_precise <- cube |>
filter(
occurrences > 1 & mincoordinateuncertaintyinmeters < 1000
)
#1.3
cube_occurrences_gt_1_precise |>
filter(!species %in% species_vector)
#1.4
cube_minimal <- cube |>
select(eeacellcode, year, specieskey, occurrences)
#1.5
cube <- cube |> relocate(
specieskey,
species,
year,
eeacellcode,
occurrences,
mincoordinateuncertaintyinmeters,
mintemporaluncertainty
)
```
## Challenge 2
### Falk's attempt
````markdown
## [2.1] rename columns
```{r renaming}
cube %<>%
rename(
cell_code = eeacellcode,
min_coord_unc = mincoordinateuncertaintyinmeters,
min_temp_unc = mintemporaluncertainty
)
```
## [2.2] count and arrange
```{r counting-rows}
cube %>%
count(species) %>%
arrange(desc(n), species) %>%
head(8) %>%
knitr::kable()
```
## [2.3] uncertainty per species
nesting?
```{r uncertainty-per-species}
cube %>%
select(species, min_coord_unc) %>%
nest(data = min_coord_unc, .by = species)
```
## [2.4] tally!
*cf.* [`add_tally`](https://dplyr.tidyverse.org/reference/count.html)
(Thanks, Damiano!)
```{r species-combinations}
species_name_key <- cube %>%
distinct(specieskey, species) %>%
add_count(species) %>%
arrange(desc(n), species)
species_name_key %>%
filter(n > 1) %>% knitr::kable()
```
## [2.5 = 2B.1] species richness
```{r species-richness}
# # original intention:
# species_richness_year <- cube_occurrences_gt_1_precise %>%
# distinct(year, species) %>%
# count(year) %>%
# rename(species_richness = n) %>%
# arrange(desc(species_richness), year)
species_richness_year <- cube %>%
distinct(year, species, cell_code) %>%
count(year, cell_code) %>%
rename(species_richness = n) %>%
arrange(desc(species_richness), year)
```
## [2.5 = 2B.2] conditional recoding values
<https://dplyr.tidyverse.org/articles/recoding-replacing.html>
```{r case-when}
species_richness_year %>%
arrange(year) %>%
mutate(
richness_category =
case_when(
species_richness <= 10 ~ "low",
species_richness <= 20 ~ "medium",
species_richness > 20 ~ "high",
# .default = NA,
.unmatched = "error"
)
)
```
## [2.6 = 2B.3] time series
calculate the measured occupancy, i.e. the number of occupied grid cells, and the total number of occurrences per species and year.
```{r summarize-time-series}
timeseries_cube <- cube %>%
summarize(
n_cells = n_distinct(cell_code),
n_occurrences = sum(occurrences),
.by = c(year, species, specieskey)
) %>%
arrange(specieskey, year)
```
````
### Droomelot's test
```
#2A.1
cube <- cube |>
rename(
"cell_code" = eeacellcode,
"min_coord_unc" = mincoordinateuncertaintyinmeters,
"min_temp_unc" = mintemporaluncertainty
)
#2A.2
cube_count_species <- cube |>
count(species, specieskey) |>
arrange(desc(n), species)
#2A.3
cube_disintct_coord <- cube |>
distinct(species, min_coord_unc) |>
arrange(species, min_coord_unc)
#2A.4
species_name_key <- cube |>
mutate(
n_species = n_distinct(specieskey),
.by = species,
) |>
distinct(specieskey, species, n_species) |>
arrange(desc(n_species), species)
#2B.1
species_richness_year <- cube |>
summarise(
n_species = n_distinct(specieskey),
.by = year
) |>
arrange(desc(n_species), year)
#2B.2
species_richness_year <- species_richness_year |>
mutate(
richness_category = case_when(
n_species <= 50 ~ "low",
n_species <= 60 ~ "medium",
n_species > 60 ~ "high",
.unmatched = "error"
)
)
#2B.3
cube_timeseries <- cube |>
group_by(year, specieskey) |>
summarise(
"n_cell_code" = n_distinct(cell_code),
"n_occurences" = sum(occurrences)
) |>
arrange(specieskey, year)
```
## Challenge 3