March 23rd 2025
Welcome!
If you want to share your code snippet, copy paste your snippet within a section of three backticks (```):
As an example:
library(tidyverse)
(you can copy paste this example and add your code further down)
No yellow sticky notes online. Put your name + " | " and add a "*" each time you solve a challenge (see below).
Name | Challenges |
---|---|
Raïsa Carmen | *** |
Jorre | |
Pieter Huybrechts | |
Falk Mielke | ** + |
Dirk Maes | *** |
Kaat Thienpont | |
Margot Vermeylen | * |
Isaac Vermeulen |
Install colorblindr
:
install.packages("remotes")
install.packages("colorspace", repos = "http://R-Forge.R-project.org")
remotes::install_github("clauswilke/colorblindr")
(source)
Copy paste this section to show your solutions.
# dummy code
print("This is how to insert code.")
occs_benelux_animals |>
ggplot(aes(x=year,y=count,group=country,color=country)) +
geom_point() +
ggtitle('GBIF occurrence records in the Benelux') +
xlab('year') +
ylab('number of occurrences') +
scale_y_continuous(trans='log10') +
geom_smooth()
occs_benelux_animals |>
ggplot(aes(x=year,y=count,group=country,fill=country)) +
geom_bar(stat='identity',position = 'dodge') +
ggtitle('GBIF occurrence records in the Benelux') +
xlab('year') +
ylab('number of occurrences') +
scale_y_continuous(trans='log10')
Put titles in separate function for reuse:
titles <- function(){
list(
ggtitle('GBIF occurrence records in the Benelux'),
xlab('year'),
ylab('number of occurrences'),
scale_y_continuous(trans='log10')
)
}
occs_benelux_animals |>
ggplot(aes(x=year,y=count,group=country,color=country)) +
geom_point() +
geom_smooth() +
titles()
occs_benelux_animals |>
ggplot(aes(x=year,y=count,group=country,fill=country)) +
geom_bar(stat='identity',position = "dodge") +
titles()
occs_benelux_animals %>%
mutate_at(factor, .vars = vars(kingdom, country)) %>%
ggplot(aes(x = year, y = count, color = country)) +
geom_point(shape = 4) +
geom_smooth(formula = 'y ~ x', method = "loess", se = TRUE) +
# geom_bar(aes(fill = country), stat = "identity", position = "dodge") +
labs(title = "GBIF occurrence records in the Benelux") +
xlab("year") +
ylab("number of occurrences") +
scale_y_log10() +
theme_minimal()
p1 <- ggplot(occs_benelux_animals, aes(x = year, y = count)) +
geom_point(aes(color = country))+#add title
labs(title = "GBIF occurrences records in the Benelux", x="year",y="number of occurrences")+
scale_y_log10()+#color loess by country+show confidence interval
geom_smooth(method = "loess", se = TRUE, aes(color = country))
p2 <- ggplot(occs_benelux_animals, aes(x = year, y = count,fill=country)) +
geom_bar(stat="identity",position=position_dodge())+
labs(title = "GBIF occurrences records in the Benelux", x="year",y="number of occurrences")+
scale_y_log10()
# Make a scatter plot (=points) with the number of occurrences (y) per year (x).
# Distinguish the countries by color.
# Add a title (e.g. "GBIF occurrence records in the Benelux") and labels for the
# axes (e.g. "year" and "number of occurrences").
# To better represent the data, use a logarithmic scale for the y-axis.
# Add a smoother to the plot. Use "loess" method and color the smoother by
# country. Show the confidence interval (standard error).
occs_benelux_animals %>%
ggplot(mapping = aes(
x = year,
y = count,
colour = country
)) +
ggtitle("GBIF occurrence records in the Benelux") +
labs(
x = "year",
y = "number of occurrences"
) +
scale_y_log10() +
geom_smooth(
method = "loess",
se = TRUE,
level = 0.95
) +
geom_point()
# This kind of data could be also represented by a bar plot. Try to do it
# (without smoother). The grammar of graphics makes this kind of change quite
# easy, isn't? What do you think about? Which one is more informative: the
# scatter plot of the bar plot? And why?
occs_benelux_animals %>%
ggplot(mapping = aes(
x = year,
y = count,
fill = country
)) +
geom_col()
occs_benelux_animals %>%
ggplot(mapping = aes(
x = year,
y = count,
fill = country
)) +
geom_bar(stat = "identity")
p <- ggplot(occs_benelux_animals,
aes(x = year,
y = count,
colour = country))+
geom_point(size = 2) +
ggtitle("GBIF occurrence records in the Benelux") +
xlab("year") +
ylab("number of occurrences") +
scale_y_log10() +
geom_smooth(method = loess,
se = TRUE)
p
# Bar plot
p <- ggplot(occs_benelux_animals,
aes(x = year,
y = count,
fill = country))+
geom_bar(stat = "identity",
position = "dodge") +
ggtitle("GBIF occurrence records in the Benelux") +
xlab("year") +
ylab("number of occurrences") +
facet_wrap(~ country,
scales = "free")
p
country_colors <- c(
"BE" = "#111111", # "black",
"NL" = "#87CEEB", # "skyblue",
"LU" = "#CD5C5C", # "indian red"
"default" = "#FFFFFF"
)
g <- occs_benelux_animals %>%
mutate_at(factor, .vars = vars(kingdom, country)) %>%
ggplot(aes(x = year, y = count, group = country, color = country)) +
geom_point(shape = 4) +
geom_smooth(formula = 'y ~ x', method = "loess", se = TRUE) +
# geom_bar(aes(fill = country), stat = "identity") +
labs(title = "GBIF occurrence records in the Benelux") +
xlab("year") +
ylab("number of occurrences") +
scale_y_log10() +
# scale_color_viridis_d() +
# scale_color_manual(values = country_colors) +
# scale_colour_paletteer_d("nbapalettes::supersonics_holiday") +
scale_colour_paletteer_d("rockthemes::facelift") +
theme_minimal()
colorblindr::cvd_grid(g)
p <- ggplot(data = occs_benelux_animals, mapping = aes(x = year, y = count, colour = country)) +
geom_point() +
labs(x = "Year", y = "Number of occurrences", title = "GBIF occurrence records in the Benelux") +
scale_y_log10() +
geom_smooth(formula = 'y ~ x', method = "loess", se = TRUE) +
scale_colour_manual(values = c("black", "skyblue", "indianred"), limits = c("BE", "NL", "LU"))
p
p <- ggplot(data = occs_benelux_animals, mapping = aes(x = year, y = count, colour = country)) +
geom_point() +
labs(x = "Year", y = "Number of occurrences", title = "GBIF occurrence records in the Benelux") +
scale_y_log10() +
geom_smooth(formula = 'y ~ x', method = "loess", se = TRUE) +
scale_colour_manual(values = c("#000000", "#87ceeb", "#cd5c5c"), limits = c("BE", "NL", "LU"))
p
p <- ggplot(data = occs_benelux_animals, mapping = aes(x = year, y = count, colour = country)) +
geom_point() +
labs(x = "Year", y = "Number of occurrences", title = "GBIF occurrence records in the Benelux") +
scale_y_log10() +
geom_smooth(formula = 'y ~ x', method = "loess", se = TRUE) +
scale_colour_viridis_d()
p
p <- ggplot(data = occs_benelux_animals, mapping = aes(x = year, y = count, colour = country)) +
geom_point() +
labs(x = "Year", y = "Number of occurrences", title = "GBIF occurrence records in the Benelux") +
scale_y_log10() +
geom_smooth(formula = 'y ~ x', method = "loess", se = TRUE) +
paletteer::scale_colour_paletteer_d("beyonce::X39")
p
cvd_grid(p)
occs_benelux_animals |>
ggplot(aes(x = year, y = count, group = country, color = country)) +
geom_point() +
geom_smooth() +
ggtitle('GBIF occurrence records in the Benelux') +
xlab('Year') +
ylab('Number of occurrences') +
scale_y_continuous(trans = 'log10') +
scale_color_manual(values = c("black", "skyblue", "indianred"),
limits = c("BE", "NL", "LU"),
name = "Country",
labels = c("BE", "NL", "LU"))
occs_benelux_animals |>
ggplot(aes(x = year, y = count, group = country, color = country)) +
geom_point() +
geom_smooth() +
ggtitle('GBIF occurrence records in the Benelux') +
xlab('Year') +
ylab('Number of occurrences') +
scale_y_continuous(trans = 'log10') +
scale_color_manual(values = c(#00000, ##0000ff, #cd5c5c),
limits = c("BE", "NL", "LU"),
name = "Country",
labels = c("BE", "NL", "LU"))
occs_benelux_animals |>
ggplot(aes(x = year, y = count, group = country, color = country)) +
geom_point() +
geom_smooth() +
ggtitle('GBIF occurrence records in the Benelux') +
xlab('Year') +
ylab('Number of occurrences') +
scale_y_continuous(trans = 'log10') +
scale_color_viridis_d(option = "plasma",
name = "Country",
labels = c("BE", "NL", "LU"))
# Set colors manually. Use "black" for Belgium, "skyblue" for The Netherlands
# and "indian red" for Luxembourg.
occs_benelux_animals %>%
ggplot(mapping = aes(
x = year,
y = count,
colour = country
)) +
scale_y_log10() +
scale_colour_manual(
values = c(BE = "black",
NL = "skyblue",
LU = "sienna3")
) +
geom_point()
# Create same plot using the hexcodes.
col_to_hex <- function(colour) rgb(t(col2rgb(colour)), maxColorValue = 256)
occs_benelux_animals %>%
ggplot(mapping = aes(
x = year,
y = count,
colour = country
)) +
scale_y_log10() +
scale_colour_manual(
values = c(BE = col_to_hex("grey10"),
NL = col_to_hex("steelblue"),
LU = col_to_hex("indianred2"))
) +
geom_point()
# Let's set the color automatically by using the famous viridis palette.
occs_benelux_animals %>%
ggplot(mapping = aes(
x = year,
y = count,
colour = country
)) +
scale_colour_viridis_d(option = "cividis") +
theme_grey() +
scale_y_log10() +
geom_point()
# Use the R Graph Gallery's color palette finder to explore palettes
# interactively. Pick a palette and use it in your plot
if (system.file(package = "wesanderson") == "") install.packages("wesanderson")
gbh_plot <-
occs_benelux_animals %>%
ggplot(mapping = aes(
x = year,
y = count,
colour = country
)) +
scale_colour_paletteer_d("wesanderson::GrandBudapest1") +
scale_y_log10() +
geom_point()
gbh_plot
# Use the {colorblindr} package to simulate how the plots you’ve just created
# may look to people with colour blindness.
cvd_grid(gbh_plot)
## 1
a <- ggplot(data = occs_benelux_animals,
mapping = aes(x = year,
y = count, colour = country)) +
geom_point() +
labs(x = "year", y = "number of occurrences", title = "GBIF occurrence records in the Benelux") +
scale_y_log10() +
stat_smooth(method = "loess", se = T, level = 0.95) +
scale_color_manual(
values = c("BE" = "black", "LU" = "indianred", "NL" = "skyblue"))
## 2
ggplot(data = occs_benelux_animals,
mapping = aes(x = year,
y = count, colour = country)) +
geom_point() +
labs(x = "year", y = "number of occurrences", title = "GBIF occurrence records in the Benelux") +
scale_y_log10() +
stat_smooth(method = "loess", se = T, level = 0.95) +
scale_color_manual(
values = c("BE" = "#000000", "LU" = "#cd5c5c", "NL" = "#87ceeb"))
## 3
b <- ggplot(data = occs_benelux_animals,
mapping = aes(x = year,
y = count, colour = country)) +
geom_point() +
labs(x = "year", y = "number of occurrences", title = "GBIF occurrence records in the Benelux") +
scale_y_log10() +
stat_smooth(method = "loess", se = T, level = 0.95) +
scale_colour_viridis_d(
name = waiver(),
alpha = 1,
begin = 0,
end = 1,
direction = 1,
option = "D",
aesthetics = "colour"
)
## 4
c <- ggplot(data = occs_benelux_animals,
mapping = aes(x = year,
y = count, colour = country)) +
geom_point() +
labs(x = "year", y = "number of occurrences", title = "GBIF occurrence records in the Benelux") +
scale_y_log10() +
stat_smooth(method = "loess", se = T, level = 0.95) +
scale_color_paletteer_d("rcartocolor::BluYl")
## 5
cvd_grid(a)
cvd_grid(b)
cvd_grid(c)
g <- occs_benelux_animals |>
ggplot(aes(x=year,y=count,color=country,fill=country)) +
geom_point() +
geom_smooth() +
ggtitle('GBIF occurrence records in the Benelux') +
xlab('year') +
ylab('number of occurrences') +
scale_y_continuous(trans='log10')
# Set colors manually. Use "black" for Belgium, "skyblue" for The Netherlands
# and "indian red" for Luxembourg. Hint: look at the cheatsheet.
cols <- c('BE'="black",'NL'="skyblue",'LU'="indian red")
g + scale_color_manual(values=cols) + scale_fill_manual(values=cols)
# A popular way of defining colours is by hex codes. Hex codes are an hash, #,
# followed by a combination of six characters - (digits 0 - 9, or letters A -
# F). There are many tools to pick colors and get their hex codes. For example,
# colorhexa: enter the color names used above to get their correspondent
# hexcodes. Create same plot using the hexcodes.
cols2 <- c('BE'="#000000",'NL'="#87ceeb",'LU'="#cd5c5c")
g + scale_color_manual(values=cols2) + scale_fill_manual(values=cols2)
# You just created a 3-color palette: a vector with colors. Thousands of
# predefined palettes exist. Let's set the color automatically by using the
# famous viridis palette. The viridis palette is so famous that ggplot has its
# own set of functions for dealing with it.
g + scale_color_viridis_d(option='H') + scale_fill_viridis_d(option='H')
# Use the R Graph Gallery's color palette finder to explore palettes
# interactively. Pick a palette and use it in your plot. Tip: you could need to
# use the {paletteer} package.
g2 <- g +
paletteer::scale_colour_paletteer_d("lisa::OskarSchlemmer") +
paletteer::scale_fill_paletteer_d("lisa::OskarSchlemmer")
g2
# Accessibility is important. Use the {colorblindr} package to simulate how the
# plots you’ve just created may look to people with colour blindness. Tip: check
# section "Accessible Choices" from Nicola Rennie's blogpost.
cvd_grid(g2)
g <- occs_benelux |>
ggplot(aes(x=year,y=count,color=country,fill=country)) +
geom_point() +
geom_smooth() +
ggtitle('GBIF occurrence records in the Benelux') +
xlab('year') +
ylab('number of occurrences') +
scale_y_continuous(trans='log10')
g + facet_wrap(~kingdom,ncol=1)
g + facet_wrap(~kingdom,ncol=2)
p4 <- ggplot(occs_benelux_animals, aes(x = year, y = count)) +
geom_point() +
facet_wrap(~country)+
labs(title = "GBIF occurrences records in the Benelux", x="year",y="number of occurrences")+
scale_y_log10()+#color loess by country+show confidence interval
geom_smooth(method = "loess", se = TRUE)
p5 <- ggplot(occs_benelux_animals, aes(x = year, y = count)) +
geom_point() +
facet_wrap(~country, nrow = 2,ncol=2)+
labs(title = "GBIF occurrences records in the Benelux", x="year",y="number of occurrences")+
scale_y_log10()+#color loess by country+show confidence interval
geom_smooth(method = "loess", se = TRUE)
p <- ggplot(data = occs_benelux, mapping = aes(x = year, y = count, colour = country)) +
geom_point() +
labs(x = "Year", y = "Number of occurrences", title = "GBIF occurrence records in the Benelux") +
scale_y_log10() +
geom_smooth(formula = 'y ~ x', method = "loess", se = TRUE) +
scale_colour_manual(values = c("black", "skyblue", "indianred"), limits = c("BE", "NL", "LU"))
p + facet_grid(~kingdom)
p + facet_wrap(~kingdom)
# Show scatter plots for each kingdom separately on one row.
occs_benelux %>%
ggplot(mapping = aes(x = year,
y = count,
colour = country)
) +
facet_grid(kingdom ~.) +
geom_point()
# Show scatter plots for each kingdom separately: 2 rows and 2 columns.
occs_benelux %>%
ggplot(mapping = aes(x = year,
y = count,
colour = country)
) +
scale_y_log10() +
facet_wrap(vars(kingdom), ncol = 2, nrow = 2) +
geom_point()
n_species_per_order_iucn <- species_be %>%
group_by(phylum, order, iucnRedListCategory) %>%
summarise(n_species = n_distinct(species)) %>%
arrange(desc(n_species))
n_species_per_order_iucn |>
ggplot(aes(x=order,y=n_species,fill=iucnRedListCategory)) +
geom_bar(stat='identity') +
facet_grid(phylum~iucnRedListCategory,scales='free_x') +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
(a bit of a different attempt)
inspiration: https://www.data-to-viz.com/graph/treemap.html
data_tree <- treemap::treemap(
n_species_per_order_iucn,
index=c("phylum", "order"),
vSize="n_species",
type="categorical",
vColor = "iucnRedListCategory",
algorithm = "pivotSize",
# sortID = "id_tree",
# mirror.y = TRUE,
# mirror.x = TRUE,
border.lwds = 0.5,
# aspRatio = 5/3
)
n_species_per_order_iucn %>%
group_by(phylum, iucnRedListCategory) %>%
summarize(n = n())
#there are only onbservations in each cell for phylum == "Tracheophyta" and for iucnRedListCategory == "NE"
#We can split up the graph:
n_species_per_order_iucn %>%
dplyr::filter(iucnRedListCategory == "NE") %>%
ggplot() +
geom_col(aes(x = order, y = n_species), position = "dodge") +
labs(title = "Number of species per order for the IUCN red list category `NE`",
x = "order",
y = "number of species"
) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
facet_wrap(vars(phylum), scales = "free")
n_species_per_order_iucn %>%
dplyr::filter(phylum == "Tracheophyta") %>%
ggplot() +
geom_col(aes(x = order, y = n_species), position = "dodge") +
labs(title = "Number of species for Tracheophyta in each IUCN red list category",
x = "order",
y = "number of species"
) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
facet_grid(vars(iucnRedListCategory), scales = "free")
n_species_per_order_iucn <- species_be %>%
group_by(phylum, order, iucnRedListCategory) %>%
summarise(n_species = n_distinct(species)) %>%
arrange(desc(n_species))
n_species_per_order_iucn
n_species_per_order_iucn %>%
ggplot(mapping = aes(x = order,
y = n_species,
fill = iucnRedListCategory)) +
facet_wrap(phylum ~ iucnRedListCategory,
scales = "free",
drop = TRUE) +
scale_x_discrete(guide = guide_axis(n.dodge=2)) +
geom_col() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
Is ggplot the best choice?
Well, now you ask…
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib_venn import venn3
# load the data
species_be = pd.read_csv("data/20250325/20250325_species_in_BE.tsv", sep = "\t")
print(species_be.sample(3).T)
# list of the relevant boolean columns
region_columns = ["inFlanders", "inWallonia", "inBrussels"]
# we do not need to group, but here you go:
"""
n_species_region = species_be \
.groupby(region_columns) \
.count() \
.loc[:, "species"] \
.sort_values(ascending = False) \
.reset_index()
print(n_species_region)
"""
# sets come natively in python
region_sets = {}
for col in region_columns:
region_sets[col] = set(species_be.loc[species_be[col].values, "species"])
print(region_sets)
# there is a special library for venn plots, which integrates with matplotlib:
# `matplotlib_venn` (https://github.com/konstantint/matplotlib-venn)
venn3(
[region_sets[recol] for recol in region_columns],
set_labels = [recol[2:] for recol in region_columns]
)
plt.show()
(https://drive.google.com/file/d/1yFer1iIxPCYcXRrPoGZlF9DHDUhmYHCs/view?usp=sharing)
;)
Thumbs up from Jorre ;-)
```## Option with VennDiagram ####
library(VennDiagram)
venn.diagram(
x = list(
Flanders = species_be %>%
dplyr::filter(inFlanders & !is.na(species)) %>%
dplyr::pull(species) %>%
unique(),
Wallonia = species_be %>%
dplyr::filter(inWallonia & !is.na(species)) %>%
dplyr::pull(species) %>%
unique(),
Brussels = species_be %>%
dplyr::filter(inBrussels & !is.na(species)) %>%
dplyr::pull(species) %>%
unique()),
filename = 'src/20250325/venn.png',
output = TRUE ,
imagetype = "png" ,
height = 480 ,
width = 480 ,
resolution = 300,
compression = "lzw",
lwd = 1,
col = c("#440154ff", '#21908dff', '#fde725ff'),
fill = c(alpha("#440154ff", 0.3), alpha('#21908dff', 0.3),
alpha('#fde725ff', 0.3)),
cex = 0.5,
fontfamily = "sans",
cat.cex = 0.3,
cat.default.pos = "outer",
cat.pos = c(-27, 27, 135),
cat.dist = c(0.055, 0.055, 0.085),
cat.fontfamily = "sans",
cat.col = c("#440154ff", '#21908dff', '#fde725ff'),
rotation = 1)