# INBO CODING CLUB March 23rd 2025 Welcome! ## Share your code snippet If you want to share your code snippet, copy paste your snippet within a section of three backticks (```): As an **example**: ``` library(tidyverse) ``` (*you can copy paste this example and add your code further down*) ## Yellow sticky notes No yellow sticky notes online. Put your name + " | " and add a "*" each time you solve a challenge (see below). ## Participants Name | Challenges --- | --- Raïsa Carmen | *** Jorre | Pieter Huybrechts | Falk Mielke | ** + Dirk Maes | *** Kaat Thienpont | Margot Vermeylen |* Isaac Vermeulen ## general Install `colorblindr`: ``` install.packages("remotes") install.packages("colorspace", repos = "http://R-Forge.R-project.org") remotes::install_github("clauswilke/colorblindr") ``` ([source](https://github.com/clauswilke/colorblindr)) ## Challenge 1 ### Raïsa's solution (dummy example) Copy paste this section to show your solutions. ```r # dummy code print("This is how to insert code.") ``` ### Jorre ```r occs_benelux_animals |> ggplot(aes(x=year,y=count,group=country,color=country)) + geom_point() + ggtitle('GBIF occurrence records in the Benelux') + xlab('year') + ylab('number of occurrences') + scale_y_continuous(trans='log10') + geom_smooth() occs_benelux_animals |> ggplot(aes(x=year,y=count,group=country,fill=country)) + geom_bar(stat='identity',position = 'dodge') + ggtitle('GBIF occurrence records in the Benelux') + xlab('year') + ylab('number of occurrences') + scale_y_continuous(trans='log10') ``` Put titles in separate function for reuse: ```r titles <- function(){ list( ggtitle('GBIF occurrence records in the Benelux'), xlab('year'), ylab('number of occurrences'), scale_y_continuous(trans='log10') ) } occs_benelux_animals |> ggplot(aes(x=year,y=count,group=country,color=country)) + geom_point() + geom_smooth() + titles() occs_benelux_animals |> ggplot(aes(x=year,y=count,group=country,fill=country)) + geom_bar(stat='identity',position = "dodge") + titles() ``` ### Falks attepmt ```r occs_benelux_animals %>% mutate_at(factor, .vars = vars(kingdom, country)) %>% ggplot(aes(x = year, y = count, color = country)) + geom_point(shape = 4) + geom_smooth(formula = 'y ~ x', method = "loess", se = TRUE) + # geom_bar(aes(fill = country), stat = "identity", position = "dodge") + labs(title = "GBIF occurrence records in the Benelux") + xlab("year") + ylab("number of occurrences") + scale_y_log10() + theme_minimal() ``` ## Hanna ```r p1 <- ggplot(occs_benelux_animals, aes(x = year, y = count)) + geom_point(aes(color = country))+#add title labs(title = "GBIF occurrences records in the Benelux", x="year",y="number of occurrences")+ scale_y_log10()+#color loess by country+show confidence interval geom_smooth(method = "loess", se = TRUE, aes(color = country)) p2 <- ggplot(occs_benelux_animals, aes(x = year, y = count,fill=country)) + geom_bar(stat="identity",position=position_dodge())+ labs(title = "GBIF occurrences records in the Benelux", x="year",y="number of occurrences")+ scale_y_log10() ``` ## Pieter ```r # Make a scatter plot (=points) with the number of occurrences (y) per year (x). # Distinguish the countries by color. # Add a title (e.g. "GBIF occurrence records in the Benelux") and labels for the # axes (e.g. "year" and "number of occurrences"). # To better represent the data, use a logarithmic scale for the y-axis. # Add a smoother to the plot. Use "loess" method and color the smoother by # country. Show the confidence interval (standard error). occs_benelux_animals %>% ggplot(mapping = aes( x = year, y = count, colour = country )) + ggtitle("GBIF occurrence records in the Benelux") + labs( x = "year", y = "number of occurrences" ) + scale_y_log10() + geom_smooth( method = "loess", se = TRUE, level = 0.95 ) + geom_point() # This kind of data could be also represented by a bar plot. Try to do it # (without smoother). The grammar of graphics makes this kind of change quite # easy, isn't? What do you think about? Which one is more informative: the # scatter plot of the bar plot? And why? occs_benelux_animals %>% ggplot(mapping = aes( x = year, y = count, fill = country )) + geom_col() occs_benelux_animals %>% ggplot(mapping = aes( x = year, y = count, fill = country )) + geom_bar(stat = "identity") ``` ## Dirk's attempt ```r p <- ggplot(occs_benelux_animals, aes(x = year, y = count, colour = country))+ geom_point(size = 2) + ggtitle("GBIF occurrence records in the Benelux") + xlab("year") + ylab("number of occurrences") + scale_y_log10() + geom_smooth(method = loess, se = TRUE) p # Bar plot p <- ggplot(occs_benelux_animals, aes(x = year, y = count, fill = country))+ geom_bar(stat = "identity", position = "dodge") + ggtitle("GBIF occurrence records in the Benelux") + xlab("year") + ylab("number of occurrences") + facet_wrap(~ country, scales = "free") p ``` ## Challenge 2 ### Falks suggestion ```r country_colors <- c( "BE" = "#111111", # "black", "NL" = "#87CEEB", # "skyblue", "LU" = "#CD5C5C", # "indian red" "default" = "#FFFFFF" ) g <- occs_benelux_animals %>% mutate_at(factor, .vars = vars(kingdom, country)) %>% ggplot(aes(x = year, y = count, group = country, color = country)) + geom_point(shape = 4) + geom_smooth(formula = 'y ~ x', method = "loess", se = TRUE) + # geom_bar(aes(fill = country), stat = "identity") + labs(title = "GBIF occurrence records in the Benelux") + xlab("year") + ylab("number of occurrences") + scale_y_log10() + # scale_color_viridis_d() + # scale_color_manual(values = country_colors) + # scale_colour_paletteer_d("nbapalettes::supersonics_holiday") + scale_colour_paletteer_d("rockthemes::facelift") + theme_minimal() colorblindr::cvd_grid(g) ``` ### Lawrence ```r p <- ggplot(data = occs_benelux_animals, mapping = aes(x = year, y = count, colour = country)) + geom_point() + labs(x = "Year", y = "Number of occurrences", title = "GBIF occurrence records in the Benelux") + scale_y_log10() + geom_smooth(formula = 'y ~ x', method = "loess", se = TRUE) + scale_colour_manual(values = c("black", "skyblue", "indianred"), limits = c("BE", "NL", "LU")) p p <- ggplot(data = occs_benelux_animals, mapping = aes(x = year, y = count, colour = country)) + geom_point() + labs(x = "Year", y = "Number of occurrences", title = "GBIF occurrence records in the Benelux") + scale_y_log10() + geom_smooth(formula = 'y ~ x', method = "loess", se = TRUE) + scale_colour_manual(values = c("#000000", "#87ceeb", "#cd5c5c"), limits = c("BE", "NL", "LU")) p p <- ggplot(data = occs_benelux_animals, mapping = aes(x = year, y = count, colour = country)) + geom_point() + labs(x = "Year", y = "Number of occurrences", title = "GBIF occurrence records in the Benelux") + scale_y_log10() + geom_smooth(formula = 'y ~ x', method = "loess", se = TRUE) + scale_colour_viridis_d() p p <- ggplot(data = occs_benelux_animals, mapping = aes(x = year, y = count, colour = country)) + geom_point() + labs(x = "Year", y = "Number of occurrences", title = "GBIF occurrence records in the Benelux") + scale_y_log10() + geom_smooth(formula = 'y ~ x', method = "loess", se = TRUE) + paletteer::scale_colour_paletteer_d("beyonce::X39") p cvd_grid(p) ``` ## Isaac ```r occs_benelux_animals |> ggplot(aes(x = year, y = count, group = country, color = country)) + geom_point() + geom_smooth() + ggtitle('GBIF occurrence records in the Benelux') + xlab('Year') + ylab('Number of occurrences') + scale_y_continuous(trans = 'log10') + scale_color_manual(values = c("black", "skyblue", "indianred"), limits = c("BE", "NL", "LU"), name = "Country", labels = c("BE", "NL", "LU")) occs_benelux_animals |> ggplot(aes(x = year, y = count, group = country, color = country)) + geom_point() + geom_smooth() + ggtitle('GBIF occurrence records in the Benelux') + xlab('Year') + ylab('Number of occurrences') + scale_y_continuous(trans = 'log10') + scale_color_manual(values = c(#00000, ##0000ff, #cd5c5c), limits = c("BE", "NL", "LU"), name = "Country", labels = c("BE", "NL", "LU")) occs_benelux_animals |> ggplot(aes(x = year, y = count, group = country, color = country)) + geom_point() + geom_smooth() + ggtitle('GBIF occurrence records in the Benelux') + xlab('Year') + ylab('Number of occurrences') + scale_y_continuous(trans = 'log10') + scale_color_viridis_d(option = "plasma", name = "Country", labels = c("BE", "NL", "LU")) ``` ## Pieter ```r # Set colors manually. Use "black" for Belgium, "skyblue" for The Netherlands # and "indian red" for Luxembourg. occs_benelux_animals %>% ggplot(mapping = aes( x = year, y = count, colour = country )) + scale_y_log10() + scale_colour_manual( values = c(BE = "black", NL = "skyblue", LU = "sienna3") ) + geom_point() # Create same plot using the hexcodes. col_to_hex <- function(colour) rgb(t(col2rgb(colour)), maxColorValue = 256) occs_benelux_animals %>% ggplot(mapping = aes( x = year, y = count, colour = country )) + scale_y_log10() + scale_colour_manual( values = c(BE = col_to_hex("grey10"), NL = col_to_hex("steelblue"), LU = col_to_hex("indianred2")) ) + geom_point() # Let's set the color automatically by using the famous viridis palette. occs_benelux_animals %>% ggplot(mapping = aes( x = year, y = count, colour = country )) + scale_colour_viridis_d(option = "cividis") + theme_grey() + scale_y_log10() + geom_point() # Use the R Graph Gallery's color palette finder to explore palettes # interactively. Pick a palette and use it in your plot if (system.file(package = "wesanderson") == "") install.packages("wesanderson") gbh_plot <- occs_benelux_animals %>% ggplot(mapping = aes( x = year, y = count, colour = country )) + scale_colour_paletteer_d("wesanderson::GrandBudapest1") + scale_y_log10() + geom_point() gbh_plot # Use the {colorblindr} package to simulate how the plots you’ve just created # may look to people with colour blindness. cvd_grid(gbh_plot) ``` ### Margot ```r ## 1 a <- ggplot(data = occs_benelux_animals, mapping = aes(x = year, y = count, colour = country)) + geom_point() + labs(x = "year", y = "number of occurrences", title = "GBIF occurrence records in the Benelux") + scale_y_log10() + stat_smooth(method = "loess", se = T, level = 0.95) + scale_color_manual( values = c("BE" = "black", "LU" = "indianred", "NL" = "skyblue")) ## 2 ggplot(data = occs_benelux_animals, mapping = aes(x = year, y = count, colour = country)) + geom_point() + labs(x = "year", y = "number of occurrences", title = "GBIF occurrence records in the Benelux") + scale_y_log10() + stat_smooth(method = "loess", se = T, level = 0.95) + scale_color_manual( values = c("BE" = "#000000", "LU" = "#cd5c5c", "NL" = "#87ceeb")) ## 3 b <- ggplot(data = occs_benelux_animals, mapping = aes(x = year, y = count, colour = country)) + geom_point() + labs(x = "year", y = "number of occurrences", title = "GBIF occurrence records in the Benelux") + scale_y_log10() + stat_smooth(method = "loess", se = T, level = 0.95) + scale_colour_viridis_d( name = waiver(), alpha = 1, begin = 0, end = 1, direction = 1, option = "D", aesthetics = "colour" ) ## 4 c <- ggplot(data = occs_benelux_animals, mapping = aes(x = year, y = count, colour = country)) + geom_point() + labs(x = "year", y = "number of occurrences", title = "GBIF occurrence records in the Benelux") + scale_y_log10() + stat_smooth(method = "loess", se = T, level = 0.95) + scale_color_paletteer_d("rcartocolor::BluYl") ## 5 cvd_grid(a) cvd_grid(b) cvd_grid(c) ``` ### Jorre ```r g <- occs_benelux_animals |> ggplot(aes(x=year,y=count,color=country,fill=country)) + geom_point() + geom_smooth() + ggtitle('GBIF occurrence records in the Benelux') + xlab('year') + ylab('number of occurrences') + scale_y_continuous(trans='log10') # Set colors manually. Use "black" for Belgium, "skyblue" for The Netherlands # and "indian red" for Luxembourg. Hint: look at the cheatsheet. cols <- c('BE'="black",'NL'="skyblue",'LU'="indian red") g + scale_color_manual(values=cols) + scale_fill_manual(values=cols) # A popular way of defining colours is by hex codes. Hex codes are an hash, #, # followed by a combination of six characters - (digits 0 - 9, or letters A - # F). There are many tools to pick colors and get their hex codes. For example, # colorhexa: enter the color names used above to get their correspondent # hexcodes. Create same plot using the hexcodes. cols2 <- c('BE'="#000000",'NL'="#87ceeb",'LU'="#cd5c5c") g + scale_color_manual(values=cols2) + scale_fill_manual(values=cols2) # You just created a 3-color palette: a vector with colors. Thousands of # predefined palettes exist. Let's set the color automatically by using the # famous viridis palette. The viridis palette is so famous that ggplot has its # own set of functions for dealing with it. g + scale_color_viridis_d(option='H') + scale_fill_viridis_d(option='H') # Use the R Graph Gallery's color palette finder to explore palettes # interactively. Pick a palette and use it in your plot. Tip: you could need to # use the {paletteer} package. g2 <- g + paletteer::scale_colour_paletteer_d("lisa::OskarSchlemmer") + paletteer::scale_fill_paletteer_d("lisa::OskarSchlemmer") g2 # Accessibility is important. Use the {colorblindr} package to simulate how the # plots you’ve just created may look to people with colour blindness. Tip: check # section "Accessible Choices" from Nicola Rennie's blogpost. cvd_grid(g2) ``` ## Challenge 3A ### Jorre ```r g <- occs_benelux |> ggplot(aes(x=year,y=count,color=country,fill=country)) + geom_point() + geom_smooth() + ggtitle('GBIF occurrence records in the Benelux') + xlab('year') + ylab('number of occurrences') + scale_y_continuous(trans='log10') g + facet_wrap(~kingdom,ncol=1) g + facet_wrap(~kingdom,ncol=2) ``` ### Hanna ```r p4 <- ggplot(occs_benelux_animals, aes(x = year, y = count)) + geom_point() + facet_wrap(~country)+ labs(title = "GBIF occurrences records in the Benelux", x="year",y="number of occurrences")+ scale_y_log10()+#color loess by country+show confidence interval geom_smooth(method = "loess", se = TRUE) p5 <- ggplot(occs_benelux_animals, aes(x = year, y = count)) + geom_point() + facet_wrap(~country, nrow = 2,ncol=2)+ labs(title = "GBIF occurrences records in the Benelux", x="year",y="number of occurrences")+ scale_y_log10()+#color loess by country+show confidence interval geom_smooth(method = "loess", se = TRUE) ``` ### Lawrence ```r p <- ggplot(data = occs_benelux, mapping = aes(x = year, y = count, colour = country)) + geom_point() + labs(x = "Year", y = "Number of occurrences", title = "GBIF occurrence records in the Benelux") + scale_y_log10() + geom_smooth(formula = 'y ~ x', method = "loess", se = TRUE) + scale_colour_manual(values = c("black", "skyblue", "indianred"), limits = c("BE", "NL", "LU")) p + facet_grid(~kingdom) p + facet_wrap(~kingdom) ``` ### Pieter 3A ```r # Show scatter plots for each kingdom separately on one row. occs_benelux %>% ggplot(mapping = aes(x = year, y = count, colour = country) ) + facet_grid(kingdom ~.) + geom_point() # Show scatter plots for each kingdom separately: 2 rows and 2 columns. occs_benelux %>% ggplot(mapping = aes(x = year, y = count, colour = country) ) + scale_y_log10() + facet_wrap(vars(kingdom), ncol = 2, nrow = 2) + geom_point() ``` ## Challenge 3B ### Jorre ```r n_species_per_order_iucn <- species_be %>% group_by(phylum, order, iucnRedListCategory) %>% summarise(n_species = n_distinct(species)) %>% arrange(desc(n_species)) n_species_per_order_iucn |> ggplot(aes(x=order,y=n_species,fill=iucnRedListCategory)) + geom_bar(stat='identity') + facet_grid(phylum~iucnRedListCategory,scales='free_x') + theme(axis.text.x = element_text(angle = 45, hjust = 1)) ``` ### Falk (a bit of a different attempt) inspiration: <https://www.data-to-viz.com/graph/treemap.html> ```r data_tree <- treemap::treemap( n_species_per_order_iucn, index=c("phylum", "order"), vSize="n_species", type="categorical", vColor = "iucnRedListCategory", algorithm = "pivotSize", # sortID = "id_tree", # mirror.y = TRUE, # mirror.x = TRUE, border.lwds = 0.5, # aspRatio = 5/3 ) ``` ### Raïsa ```r n_species_per_order_iucn %>% group_by(phylum, iucnRedListCategory) %>% summarize(n = n()) #there are only onbservations in each cell for phylum == "Tracheophyta" and for iucnRedListCategory == "NE" #We can split up the graph: n_species_per_order_iucn %>% dplyr::filter(iucnRedListCategory == "NE") %>% ggplot() + geom_col(aes(x = order, y = n_species), position = "dodge") + labs(title = "Number of species per order for the IUCN red list category `NE`", x = "order", y = "number of species" ) + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + facet_wrap(vars(phylum), scales = "free") n_species_per_order_iucn %>% dplyr::filter(phylum == "Tracheophyta") %>% ggplot() + geom_col(aes(x = order, y = n_species), position = "dodge") + labs(title = "Number of species for Tracheophyta in each IUCN red list category", x = "order", y = "number of species" ) + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + facet_grid(vars(iucnRedListCategory), scales = "free") ``` ## Pieter 3B ```r n_species_per_order_iucn <- species_be %>% group_by(phylum, order, iucnRedListCategory) %>% summarise(n_species = n_distinct(species)) %>% arrange(desc(n_species)) n_species_per_order_iucn n_species_per_order_iucn %>% ggplot(mapping = aes(x = order, y = n_species, fill = iucnRedListCategory)) + facet_wrap(phylum ~ iucnRedListCategory, scales = "free", drop = TRUE) + scale_x_discrete(guide = guide_axis(n.dodge=2)) + geom_col() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) ``` ## Bonus Challenge ### Falks Favorite Plotting Library Is ggplot the best choice? Well, now you ask... ```python import numpy as np import pandas as pd import matplotlib as mpl import matplotlib.pyplot as plt from matplotlib_venn import venn3 # load the data species_be = pd.read_csv("data/20250325/20250325_species_in_BE.tsv", sep = "\t") print(species_be.sample(3).T) # list of the relevant boolean columns region_columns = ["inFlanders", "inWallonia", "inBrussels"] # we do not need to group, but here you go: """ n_species_region = species_be \ .groupby(region_columns) \ .count() \ .loc[:, "species"] \ .sort_values(ascending = False) \ .reset_index() print(n_species_region) """ # sets come natively in python region_sets = {} for col in region_columns: region_sets[col] = set(species_be.loc[species_be[col].values, "species"]) print(region_sets) # there is a special library for venn plots, which integrates with matplotlib: # `matplotlib_venn` (https://github.com/konstantint/matplotlib-venn) venn3( [region_sets[recol] for recol in region_columns], set_labels = [recol[2:] for recol in region_columns] ) plt.show() ``` (<https://drive.google.com/file/d/1yFer1iIxPCYcXRrPoGZlF9DHDUhmYHCs/view?usp=sharing>) ;) Thumbs up from Jorre ;-) ### Raïsa's solution ```r= ```## Option with VennDiagram #### library(VennDiagram) venn.diagram( x = list( Flanders = species_be %>% dplyr::filter(inFlanders & !is.na(species)) %>% dplyr::pull(species) %>% unique(), Wallonia = species_be %>% dplyr::filter(inWallonia & !is.na(species)) %>% dplyr::pull(species) %>% unique(), Brussels = species_be %>% dplyr::filter(inBrussels & !is.na(species)) %>% dplyr::pull(species) %>% unique()), filename = 'src/20250325/venn.png', output = TRUE , imagetype = "png" , height = 480 , width = 480 , resolution = 300, compression = "lzw", lwd = 1, col = c("#440154ff", '#21908dff', '#fde725ff'), fill = c(alpha("#440154ff", 0.3), alpha('#21908dff', 0.3), alpha('#fde725ff', 0.3)), cex = 0.5, fontfamily = "sans", cat.cex = 0.3, cat.default.pos = "outer", cat.pos = c(-27, 27, 135), cat.dist = c(0.055, 0.055, 0.085), cat.fontfamily = "sans", cat.col = c("#440154ff", '#21908dff', '#fde725ff'), rotation = 1)