INBO CODING CLUB

March 23rd 2025

Welcome!

Share your code snippet

If you want to share your code snippet, copy paste your snippet within a section of three backticks (```):

As an example:

library(tidyverse)

(you can copy paste this example and add your code further down)

Yellow sticky notes

No yellow sticky notes online. Put your name + " | " and add a "*" each time you solve a challenge (see below).

Participants

Name Challenges
Raïsa Carmen ***
Jorre
Pieter Huybrechts
Falk Mielke ** +
Dirk Maes ***
Kaat Thienpont
Margot Vermeylen *
Isaac Vermeulen

general

Install colorblindr:

install.packages("remotes")
install.packages("colorspace", repos = "http://R-Forge.R-project.org")
remotes::install_github("clauswilke/colorblindr")

(source)

Challenge 1

Raïsa's solution (dummy example)

Copy paste this section to show your solutions.

# dummy code
print("This is how to insert code.")

Jorre

occs_benelux_animals |>
  ggplot(aes(x=year,y=count,group=country,color=country)) +
  geom_point() +
  ggtitle('GBIF occurrence records in the Benelux') +
  xlab('year') +
  ylab('number of occurrences') +
  scale_y_continuous(trans='log10') +
  geom_smooth()

occs_benelux_animals |>
  ggplot(aes(x=year,y=count,group=country,fill=country)) +
  geom_bar(stat='identity',position = 'dodge') +
  ggtitle('GBIF occurrence records in the Benelux') +
  xlab('year') +
  ylab('number of occurrences') +
  scale_y_continuous(trans='log10')

Put titles in separate function for reuse:

titles <- function(){
  list(
    ggtitle('GBIF occurrence records in the Benelux'),
    xlab('year'),
    ylab('number of occurrences'),
    scale_y_continuous(trans='log10')
    )
  }

occs_benelux_animals |>
  ggplot(aes(x=year,y=count,group=country,color=country)) +
  geom_point() +
  geom_smooth() +
  titles()

occs_benelux_animals |>
  ggplot(aes(x=year,y=count,group=country,fill=country)) +
  geom_bar(stat='identity',position = "dodge") +
  titles()

Falks attepmt

occs_benelux_animals %>%
  mutate_at(factor, .vars = vars(kingdom, country)) %>%
  ggplot(aes(x = year, y = count, color = country)) +
  geom_point(shape = 4) +
  geom_smooth(formula = 'y ~ x', method = "loess", se = TRUE) +
  # geom_bar(aes(fill = country), stat = "identity", position = "dodge") +
  labs(title = "GBIF occurrence records in the Benelux") +
  xlab("year") +
  ylab("number of occurrences") +
  scale_y_log10() +
  theme_minimal()

Hanna

p1 <- ggplot(occs_benelux_animals, aes(x = year, y = count)) +
  geom_point(aes(color = country))+#add title
  labs(title = "GBIF occurrences records in the Benelux", x="year",y="number of occurrences")+
  scale_y_log10()+#color loess by country+show confidence interval
  geom_smooth(method = "loess", se = TRUE, aes(color = country))


p2 <- ggplot(occs_benelux_animals, aes(x = year, y = count,fill=country)) +
  geom_bar(stat="identity",position=position_dodge())+
  labs(title = "GBIF occurrences records in the Benelux", x="year",y="number of occurrences")+
  scale_y_log10()

Pieter


# Make a scatter plot (=points) with the number of occurrences (y) per year (x).
# Distinguish the countries by color.

# Add a title (e.g. "GBIF occurrence records in the Benelux") and labels for the
# axes (e.g. "year" and "number of occurrences").

# To better represent the data, use a logarithmic scale for the y-axis.

# Add a smoother to the plot. Use "loess" method and color the smoother by
# country. Show the confidence interval (standard error).

occs_benelux_animals %>%
  ggplot(mapping = aes(
    x = year,
    y = count,
    colour = country
  )) +
  ggtitle("GBIF occurrence records in the Benelux") +
  labs(
    x = "year",
    y = "number of occurrences"
  ) +
  scale_y_log10() +
  geom_smooth(
    method = "loess",
    se = TRUE,
    level = 0.95
  ) +
  geom_point()

# This kind of data could be also represented by a bar plot. Try to do it
# (without smoother). The grammar of graphics makes this kind of change quite
# easy, isn't? What do you think about? Which one is more informative: the
# scatter plot of the bar plot? And why?

occs_benelux_animals %>%
  ggplot(mapping = aes(
    x = year,
    y = count,
    fill = country
  )) +
  geom_col()

occs_benelux_animals %>%
  ggplot(mapping = aes(
    x = year,
    y = count,
    fill = country
  )) +
  geom_bar(stat = "identity")

Dirk's attempt

p <- ggplot(occs_benelux_animals,
            aes(x = year,
                y = count,
                colour = country))+
  geom_point(size = 2) +
  ggtitle("GBIF occurrence records in the Benelux") +
  xlab("year") +
  ylab("number of occurrences") +
  scale_y_log10() +
  geom_smooth(method = loess,
              se = TRUE)
p

# Bar plot
p <- ggplot(occs_benelux_animals,
            aes(x = year,
                y = count,
                fill = country))+
  geom_bar(stat = "identity",
           position = "dodge") +
  ggtitle("GBIF occurrence records in the Benelux") +
  xlab("year") +
  ylab("number of occurrences") +
  facet_wrap(~ country,
             scales = "free")
p

Challenge 2

Falks suggestion

country_colors <- c(
  "BE" = "#111111", # "black",
  "NL" = "#87CEEB", # "skyblue",
  "LU" = "#CD5C5C", # "indian red"
  "default" = "#FFFFFF"
)


g <- occs_benelux_animals %>%
  mutate_at(factor, .vars = vars(kingdom, country)) %>%
  ggplot(aes(x = year, y = count, group = country, color = country)) +
  geom_point(shape = 4) +
  geom_smooth(formula = 'y ~ x', method = "loess", se = TRUE) +
  # geom_bar(aes(fill = country), stat = "identity") +
  labs(title = "GBIF occurrence records in the Benelux") +
  xlab("year") +
  ylab("number of occurrences") +
  scale_y_log10() +
  # scale_color_viridis_d() +
  # scale_color_manual(values = country_colors) +
  # scale_colour_paletteer_d("nbapalettes::supersonics_holiday") +
  scale_colour_paletteer_d("rockthemes::facelift") +
  theme_minimal()

colorblindr::cvd_grid(g)

Lawrence

p <- ggplot(data = occs_benelux_animals, mapping = aes(x = year, y = count, colour = country)) + 
  geom_point() +
  labs(x = "Year", y = "Number of occurrences", title = "GBIF occurrence records in the Benelux") + 
  scale_y_log10() + 
  geom_smooth(formula = 'y ~ x', method = "loess", se = TRUE) +
  scale_colour_manual(values = c("black", "skyblue", "indianred"), limits = c("BE", "NL", "LU"))
p

p <- ggplot(data = occs_benelux_animals, mapping = aes(x = year, y = count, colour = country)) + 
  geom_point() +
  labs(x = "Year", y = "Number of occurrences", title = "GBIF occurrence records in the Benelux") + 
  scale_y_log10() + 
  geom_smooth(formula = 'y ~ x', method = "loess", se = TRUE) +
  scale_colour_manual(values = c("#000000", "#87ceeb", "#cd5c5c"), limits = c("BE", "NL", "LU"))
p

p <- ggplot(data = occs_benelux_animals, mapping = aes(x = year, y = count, colour = country)) + 
  geom_point() +
  labs(x = "Year", y = "Number of occurrences", title = "GBIF occurrence records in the Benelux") + 
  scale_y_log10() + 
  geom_smooth(formula = 'y ~ x', method = "loess", se = TRUE) +
  scale_colour_viridis_d()
p

p <- ggplot(data = occs_benelux_animals, mapping = aes(x = year, y = count, colour = country)) + 
  geom_point() +
  labs(x = "Year", y = "Number of occurrences", title = "GBIF occurrence records in the Benelux") + 
  scale_y_log10() + 
  geom_smooth(formula = 'y ~ x', method = "loess", se = TRUE) +
  paletteer::scale_colour_paletteer_d("beyonce::X39") 
p

cvd_grid(p)

Isaac

occs_benelux_animals |>
  ggplot(aes(x = year, y = count, group = country, color = country)) +
  geom_point() +
  geom_smooth() +
  ggtitle('GBIF occurrence records in the Benelux') +
  xlab('Year') +
  ylab('Number of occurrences') +
  scale_y_continuous(trans = 'log10') +
  scale_color_manual(values = c("black", "skyblue", "indianred"),
                     limits = c("BE", "NL", "LU"), 
                     name = "Country", 
                     labels = c("BE", "NL", "LU"))


occs_benelux_animals |>
  ggplot(aes(x = year, y = count, group = country, color = country)) +
  geom_point() +
  geom_smooth() +
  ggtitle('GBIF occurrence records in the Benelux') +
  xlab('Year') +
  ylab('Number of occurrences') +
  scale_y_continuous(trans = 'log10') +
  scale_color_manual(values = c(#00000, ##0000ff, #cd5c5c),
                     limits = c("BE", "NL", "LU"), 
                     name = "Country", 
                     labels = c("BE", "NL", "LU"))
                     
                     
 occs_benelux_animals |>
 ggplot(aes(x = year, y = count, group = country, color = country)) +
 geom_point() +
 geom_smooth() +
 ggtitle('GBIF occurrence records in the Benelux') +
 xlab('Year') +
 ylab('Number of occurrences') +
 scale_y_continuous(trans = 'log10') +
 scale_color_viridis_d(option = "plasma",  
                       name = "Country", 
                       labels = c("BE", "NL", "LU"))
                     

Pieter

# Set colors manually. Use "black" for Belgium, "skyblue" for The Netherlands
# and "indian red" for Luxembourg.

occs_benelux_animals %>%
  ggplot(mapping = aes(
    x = year,
    y = count,
    colour = country
  )) +
  scale_y_log10() +
  scale_colour_manual(
    values = c(BE = "black",
               NL = "skyblue",
               LU = "sienna3")
  ) +
  geom_point()

# Create same plot using the hexcodes.

col_to_hex <- function(colour) rgb(t(col2rgb(colour)), maxColorValue = 256)

occs_benelux_animals %>%
  ggplot(mapping = aes(
    x = year,
    y = count,
    colour = country
  )) +
  scale_y_log10() +
  scale_colour_manual(
    values = c(BE = col_to_hex("grey10"),
               NL = col_to_hex("steelblue"),
               LU = col_to_hex("indianred2"))
  ) +
  geom_point()

# Let's set the color automatically by using the famous viridis palette.
occs_benelux_animals %>%
  ggplot(mapping = aes(
    x = year,
    y = count,
    colour = country
  )) +
  scale_colour_viridis_d(option = "cividis") +
  theme_grey() +
  scale_y_log10() +
  geom_point()

# Use the R Graph Gallery's color palette finder to explore palettes
# interactively. Pick a palette and use it in your plot

if (system.file(package = "wesanderson") == "") install.packages("wesanderson")

gbh_plot <-
  occs_benelux_animals %>%
  ggplot(mapping = aes(
    x = year,
    y = count,
    colour = country
  )) +
  scale_colour_paletteer_d("wesanderson::GrandBudapest1") +
  scale_y_log10() +
  geom_point()

gbh_plot

# Use the {colorblindr} package to simulate how the plots you’ve just created
# may look to people with colour blindness.

cvd_grid(gbh_plot)

Margot

## 1
a <- ggplot(data = occs_benelux_animals,
       mapping = aes(x = year,
                     y = count, colour = country)) +
  geom_point() +
  labs(x = "year", y = "number of occurrences", title = "GBIF occurrence records in the Benelux") +
  scale_y_log10() +
  stat_smooth(method = "loess", se = T, level = 0.95) +
  scale_color_manual(
    values = c("BE" = "black", "LU" = "indianred", "NL" = "skyblue"))

## 2
ggplot(data = occs_benelux_animals,
       mapping = aes(x = year,
                     y = count, colour = country)) +
  geom_point() +
  labs(x = "year", y = "number of occurrences", title = "GBIF occurrence records in the Benelux") +
  scale_y_log10() +
  stat_smooth(method = "loess", se = T, level = 0.95) +
  scale_color_manual(
    values = c("BE" = "#000000", "LU" = "#cd5c5c", "NL" = "#87ceeb"))

## 3
b <- ggplot(data = occs_benelux_animals,
       mapping = aes(x = year,
                     y = count, colour = country)) +
  geom_point() +
  labs(x = "year", y = "number of occurrences", title = "GBIF occurrence records in the Benelux") +
  scale_y_log10() +
  stat_smooth(method = "loess", se = T, level = 0.95) +
  scale_colour_viridis_d(
  name = waiver(),
  alpha = 1,
  begin = 0,
  end = 1,
  direction = 1,
  option = "D",
  aesthetics = "colour"
)

## 4
c <- ggplot(data = occs_benelux_animals,
       mapping = aes(x = year,
                     y = count, colour = country)) +
  geom_point() +
  labs(x = "year", y = "number of occurrences", title = "GBIF occurrence records in the Benelux") +
  scale_y_log10() +
  stat_smooth(method = "loess", se = T, level = 0.95) +
  scale_color_paletteer_d("rcartocolor::BluYl")

## 5
cvd_grid(a)
cvd_grid(b)
cvd_grid(c)

Jorre

g <- occs_benelux_animals |>
  ggplot(aes(x=year,y=count,color=country,fill=country)) +
  geom_point() +
  geom_smooth() +
  ggtitle('GBIF occurrence records in the Benelux') +
  xlab('year') +
  ylab('number of occurrences') +
  scale_y_continuous(trans='log10')

# Set colors manually. Use "black" for Belgium, "skyblue" for The Netherlands
# and "indian red" for Luxembourg. Hint: look at the cheatsheet.

cols <- c('BE'="black",'NL'="skyblue",'LU'="indian red")
g + scale_color_manual(values=cols) + scale_fill_manual(values=cols)

# A popular way of defining colours is by hex codes. Hex codes are an hash, #,
# followed by a combination of six characters - (digits 0 - 9, or letters A -
# F). There are many tools to pick colors and get their hex codes. For example,
# colorhexa: enter the color names used above to get their correspondent
# hexcodes. Create same plot using the hexcodes.

cols2 <- c('BE'="#000000",'NL'="#87ceeb",'LU'="#cd5c5c")
g + scale_color_manual(values=cols2) + scale_fill_manual(values=cols2)


# You just created a 3-color palette: a vector with colors. Thousands of
# predefined palettes exist. Let's set the color automatically by using the
# famous viridis palette. The viridis palette is so famous that ggplot has its
# own set of functions for dealing with it.

g + scale_color_viridis_d(option='H') + scale_fill_viridis_d(option='H')


# Use the R Graph Gallery's color palette finder to explore palettes
# interactively. Pick a palette and use it in your plot. Tip: you could need to
# use the {paletteer} package.

g2 <- g +
  paletteer::scale_colour_paletteer_d("lisa::OskarSchlemmer") +
  paletteer::scale_fill_paletteer_d("lisa::OskarSchlemmer")
g2


# Accessibility is important. Use the {colorblindr} package to simulate how the
# plots you’ve just created may look to people with colour blindness. Tip: check
# section "Accessible Choices" from Nicola Rennie's blogpost.

cvd_grid(g2)

Challenge 3A

Jorre

g <- occs_benelux |>
  ggplot(aes(x=year,y=count,color=country,fill=country)) +
  geom_point() +
  geom_smooth() +
  ggtitle('GBIF occurrence records in the Benelux') +
  xlab('year') +
  ylab('number of occurrences') +
  scale_y_continuous(trans='log10') 

g + facet_wrap(~kingdom,ncol=1)
g + facet_wrap(~kingdom,ncol=2)

Hanna

p4 <- ggplot(occs_benelux_animals, aes(x = year, y = count)) +
  geom_point() +
  facet_wrap(~country)+
  labs(title = "GBIF occurrences records in the Benelux", x="year",y="number of occurrences")+
  scale_y_log10()+#color loess by country+show confidence interval
  geom_smooth(method = "loess", se = TRUE)

p5 <- ggplot(occs_benelux_animals, aes(x = year, y = count)) +
  geom_point() +
  facet_wrap(~country, nrow = 2,ncol=2)+
  labs(title = "GBIF occurrences records in the Benelux", x="year",y="number of occurrences")+
  scale_y_log10()+#color loess by country+show confidence interval
  geom_smooth(method = "loess", se = TRUE)

Lawrence

p <- ggplot(data = occs_benelux, mapping = aes(x = year, y = count, colour = country)) + 
  geom_point() +
  labs(x = "Year", y = "Number of occurrences", title = "GBIF occurrence         records in the Benelux") + 
  scale_y_log10() + 
  geom_smooth(formula = 'y ~ x', method = "loess", se = TRUE) +
  scale_colour_manual(values = c("black", "skyblue", "indianred"), limits =     c("BE", "NL", "LU"))

p + facet_grid(~kingdom)
p + facet_wrap(~kingdom)

Pieter 3A

    # Show scatter plots for each kingdom separately on one row.
occs_benelux %>%
  ggplot(mapping = aes(x = year,
                       y = count,
                       colour = country)
  ) +
  facet_grid(kingdom ~.) +
  geom_point()
    # Show scatter plots for each kingdom separately: 2 rows and 2 columns.

occs_benelux %>%
  ggplot(mapping = aes(x = year,
                       y = count,
                       colour = country)
  ) +
  scale_y_log10() +
  facet_wrap(vars(kingdom), ncol = 2, nrow = 2) +
  geom_point()

Challenge 3B

Jorre

n_species_per_order_iucn <- species_be %>%
  group_by(phylum, order, iucnRedListCategory) %>%
  summarise(n_species = n_distinct(species)) %>%
  arrange(desc(n_species))
n_species_per_order_iucn |>
  ggplot(aes(x=order,y=n_species,fill=iucnRedListCategory)) +
  geom_bar(stat='identity') + 
  facet_grid(phylum~iucnRedListCategory,scales='free_x') +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Falk

(a bit of a different attempt)

inspiration: https://www.data-to-viz.com/graph/treemap.html

data_tree <- treemap::treemap(
  n_species_per_order_iucn,
  index=c("phylum", "order"),
  vSize="n_species",
  type="categorical",
  vColor = "iucnRedListCategory",
  algorithm = "pivotSize",
  # sortID = "id_tree",
  # mirror.y = TRUE,
  # mirror.x = TRUE,
  border.lwds = 0.5,
  # aspRatio = 5/3
)

Raïsa

n_species_per_order_iucn %>%
  group_by(phylum, iucnRedListCategory) %>%
  summarize(n = n())
#there are only onbservations in each cell for phylum == "Tracheophyta" and for iucnRedListCategory == "NE"
#We can split up the graph:

n_species_per_order_iucn %>%
  dplyr::filter(iucnRedListCategory == "NE") %>%
  ggplot() +
  geom_col(aes(x = order, y = n_species), position = "dodge") +
  labs(title = "Number of species per order for the IUCN red list category `NE`",
       x = "order",
       y = "number of species"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  facet_wrap(vars(phylum), scales = "free")


n_species_per_order_iucn %>%
  dplyr::filter(phylum == "Tracheophyta") %>%
  ggplot() +
  geom_col(aes(x = order, y = n_species), position = "dodge") +
  labs(title = "Number of species for Tracheophyta in each IUCN red list category",
       x = "order",
       y = "number of species"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  facet_grid(vars(iucnRedListCategory), scales = "free")

Pieter 3B

n_species_per_order_iucn <- species_be %>%
  group_by(phylum, order, iucnRedListCategory) %>%
  summarise(n_species = n_distinct(species)) %>%
  arrange(desc(n_species))
n_species_per_order_iucn

n_species_per_order_iucn %>%
  ggplot(mapping = aes(x = order,
                       y = n_species,
                       fill = iucnRedListCategory)) +
  facet_wrap(phylum ~ iucnRedListCategory,
             scales = "free",
             drop = TRUE) +
  scale_x_discrete(guide = guide_axis(n.dodge=2)) +
  geom_col() +
   theme(axis.text.x = element_text(angle = 45, hjust = 1))

Bonus Challenge

Falks Favorite Plotting Library

​​​​Is ggplot the best choice?

Well, now you ask

import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib_venn import venn3

# load the data
species_be = pd.read_csv("data/20250325/20250325_species_in_BE.tsv", sep = "\t")
print(species_be.sample(3).T)

# list of the relevant boolean columns
region_columns = ["inFlanders", "inWallonia", "inBrussels"]

# we do not need to group, but here you go:
"""
n_species_region = species_be \
    .groupby(region_columns) \
    .count() \
    .loc[:, "species"] \
    .sort_values(ascending = False) \
    .reset_index()

print(n_species_region)
"""

# sets come natively in python
region_sets = {}
for col in region_columns:
    region_sets[col] = set(species_be.loc[species_be[col].values, "species"])
print(region_sets)

# there is a special library for venn plots, which integrates with matplotlib:
# `matplotlib_venn` (https://github.com/konstantint/matplotlib-venn)
venn3(
    [region_sets[recol] for recol in region_columns],
    set_labels = [recol[2:] for recol in region_columns]
)
plt.show()

(https://drive.google.com/file/d/1yFer1iIxPCYcXRrPoGZlF9DHDUhmYHCs/view?usp=sharing)

;)

Thumbs up from Jorre ;-)

Raïsa's solution

```## Option with VennDiagram #### library(VennDiagram) venn.diagram( x = list( Flanders = species_be %>% dplyr::filter(inFlanders & !is.na(species)) %>% dplyr::pull(species) %>% unique(), Wallonia = species_be %>% dplyr::filter(inWallonia & !is.na(species)) %>% dplyr::pull(species) %>% unique(), Brussels = species_be %>% dplyr::filter(inBrussels & !is.na(species)) %>% dplyr::pull(species) %>% unique()), filename = 'src/20250325/venn.png', output = TRUE , imagetype = "png" , height = 480 , width = 480 , resolution = 300, compression = "lzw", lwd = 1, col = c("#440154ff", '#21908dff', '#fde725ff'), fill = c(alpha("#440154ff", 0.3), alpha('#21908dff', 0.3), alpha('#fde725ff', 0.3)), cex = 0.5, fontfamily = "sans", cat.cex = 0.3, cat.default.pos = "outer", cat.pos = c(-27, 27, 135), cat.dist = c(0.055, 0.055, 0.085), cat.fontfamily = "sans", cat.col = c("#440154ff", '#21908dff', '#fde725ff'), rotation = 1)
Select a repo