owned this note
owned this note
Published
Linked with GitHub
# INBO CODING CLUB
30 June 2022
Welcome!
## Share your code snippet
If you want to share your code snippet, copy paste your snippet within a section of three backticks (```):
As an **example**:
```
library(tidyverse)
```
(*you can copy paste this example and add your code further down*)
## Yellow sticky notes
No yellow sticky notes online. Put your name + " | " and add a "*" each time you solve a challenge (see below).
## Participants
Name | Challenges
--- | ---
Damiano Oldoni |
Dirk Maes |
An Leyssen |*
Hans Van Calster |
Emma Cartuyvels|**
Lynn Pallemaerts | ***
Teun Everts |
Sarah Broos|
Lucia Manzanares|
Matthieu |
## Challenge 1
Emma:
```
get_obs_2010 <- function(species){
## set scientific name to lowercase
species <- tolower(species)
## replace spaces with underscores
species <- str_replace_all(
species,
pattern = " ",
replacement = "_"
)
## compose filename
file_name <- paste0("20220630_", species, "_2010", ".txt")
## read file
read_tsv(paste0("./data/20220630/",
file_name))
}
ha_2010 <- get_obs_2010("Harmonia axyridis")
get_obs <- function(species, year){
## set scientific name to lowercase
species <- tolower(species)
## replace spaces with underscores
species <- str_replace_all(
species,
pattern = " ",
replacement = "_"
)
## compose filename
file_name <- paste0("20220630_", species, "_", year, ".txt")
## read file
read_tsv(paste0("./data/20220630/",
file_name))
}
ha_2011 <- get_obs("Harmonia axyridis", 2011)
```
Lynn:
```
get_obs <- function(species,
year) {
# standardize species name
sp <- species
sp <- tolower(sp)
sp <- str_replace_all(sp,
pattern = " ",
replacement = "_")
# compose filename
file_name <- paste0("20220630_", sp, "_", year, ".txt")
# read file
df <- read_tsv(paste0("./data/20220630/",
file_name))
return(df)
}
```
Sarah:
```
get_obs_2010<-function(species){
## set scientific name to lowercase
species <- tolower(species)
## replace spaces with underscores
species <- str_replace_all(
species,
pattern = " ",
replacement = "_")
## compose filename
file_name <- paste0("20220630_", species, "_2010", ".txt")
## read file
ha_2010 <- read_tsv(paste0("./data/20220630/",
file_name))
return (ha_2010)
}
get_obs_2010("Harmonia axyridis")
get_obs_2010("Harmonia axyridis")
get_obs<-function(species, year){
## set scientific name to lowercase
species <- tolower(species)
## replace spaces with underscores
species <- str_replace_all(
species,
pattern = " ",
replacement = "_")
## compose filename
file_name <- paste0("20220630_", species, "_", year, ".txt")
## read file
ha_year <- read_tsv(paste0("./data/20220630/",
file_name))
return (ha_year)
}
get_obs("Harmonia axyridis",2012)
```
## Challenge 2
Lynn:
```
clean_data <- function(df,
max_coord_uncertain = 1000,
issues_to_discard = c("ZERO_COORDINATE",
"COORDINATE_OUT_OF_RANGE",
"COORDINATE_INVALID",
"COUNTRY_COORDINATE_MISMATCH"),
occurrenceStatus_to_discard = c("absent",
"excluded")) {
df <- df %>%
filter(coordinateUncertaintyInMeters < max_coord_uncertain | is.na(coordinateUncertaintyInMeters)) %>%
filter(!issue %in% issues_to_discard) %>%
filter(!occurrenceStatus %in% occurrenceStatus_to_discard)
return(df)
}
assign_eea_cell <- function(df,
longitude_colname,
latitude_colname) {
# reprojection
df <- df %>%
mutate(x = !!sym(longitude_colname),
y = !!sym(latitude_colname))
df <- st_as_sf(df,
coords = c("x", "y"),
crs = 4326)
df <- st_transform(df,
crs = 3035)
coords <- st_coordinates(df) %>%
as_tibble() %>%
rename(x_3035 = X,
y_3035 = Y)
df <- df %>%
bind_cols(coords)
# get the EAA grid cell code
df <- df %>%
mutate(eea_cell_code = paste0("1km",
"E",
floor(x_3035/1000),
floor(y_3035/1000))) %>%
select(-c(x_3035, y_3035))
return(df)
}
get_n_obs_per_cell <- function(df) {
# calculate number of observations in each grid cell
res <- df %>%
as_tibble() %>%
group_by(eea_cell_code) %>%
count()
return(res)
}
join_to_grid <- function(df) {
# read EEA grid
be_grid <- st_read("./data/20220630/20220630_eea_1x1km_grid_BE.gpkg")
res <- be_grid %>%
inner_join(df, by = c("CELLCODE" = "eea_cell_code"))
return(res)
}
```
Hans (challenge 1 + 2):
- Gebruik van require() om packages in functie te laden
- assertthat om input variabelen te checken
```
#' Get the observation for a species in a given year
#'
#' @param species character string
#' @param year numeric year
#'
#' @return a dataframe
#' @export
#'
#' @examples
#' get_obs(species = "Harmonia axyridis", year = 2010)
get_obs <- function(
species,
year
) {
require(readr)
require(assertthat)
require(stringr)
assert_that(is.character(species))
assert_that(is.numeric(year))
## set scientific name to lowercase
species <- tolower(species)
## replace spaces with underscores
species <- str_replace_all(
species,
pattern = " ",
replacement = "_"
)
## compose filename
file_name <- paste0("20220630_", species, "_", year, ".txt")
## read file
species_data <- read_tsv(paste0("./data/20220630/",
file_name))
return(species_data)
}
#' Clean the data
#'
#' @param df a data.frame containing columns ...
#' @param coord_max_uncertainty max coordinate uncertainty in meters
#' @param remove_issues a character vector with valid issue names to be removed
#' @param remove_status a character vector with valid status names to be removed
#'
#' @return
#' @export
#'
#' @examples
clean_data <- function(
df,
coord_max_uncertainty = 1000,
remove_issues = c(
"ZERO_COORDINATE",
"COORDINATE_OUT_OF_RANGE",
"COORDINATE_INVALID",
"COUNTRY_COORDINATE_MISMATCH"
),
remove_status = c(
"absent",
"excluded"
)) {
require(dplyr)
require(assertthat)
assert_that(is.data.frame(df))
assert_that(is.character(remove_issues))
assert_that(is.character(remove_status))
# Remove data with coordinate uncertainty higher than 1000 meters
df_cleaned <-
df %>%
filter(coordinateUncertaintyInMeters < coord_max_uncertainty |
is.na(coordinateUncertaintyInMeters)) %>%
## Remove data with some issues
filter(!issue %in% remove_issues) %>%
## Remove obs linked to absences or exclusions
filter(!occurrenceStatus %in% remove_status)
return(df_cleaned)
}
#' Add spatial information EEA
#'
#' @param df
#' @param longitude_colname character, name of the column with longitude values
#' @param latitude_colname character, name of the column with latitude values
#'
#' @return
#' @export
#'
#' @examples
assign_eea_cell <- function(
df,
longitude_colname,
latitude_colname
) {
require(dplyr)
require(sf)
require(assertthat)
assert_that(is.data.frame(df))
assert_that(is.string(longitude_colname))
assert_that(is.string(latitude_colname))
## Step 3a: reprojection
df_coords <- df %>%
mutate(x = !!sym(longitude_colname),
y = !!sym(latitude_colname)) %>%
st_as_sf(coords = c("x", "y"), crs = 4326) %>%
st_transform(crs = 3035) %>%
st_coordinates() %>%
as_tibble() %>%
rename(x_3035 = X,
y_3035 = Y) %>%
bind_cols(df, .) %>%
## Step 3b: get the EAA grid cell code each observation belongs to and add it to
## the data.frame
mutate(eea_cell_code = paste0(
"1km",
"E", floor(x_3035/1000),
"N", floor(y_3035/1000))) %>%
select(-c(x_3035, y_3035))
return(df_coords)
}
#' Calculate number of observations per grid cell
#'
#' @param df a data.frame
#'
#' @return
#' @export
#'
#' @examples
get_n_obs_per_cell <- function(df) {
require(dplyr)
require(assertthat)
assert_that(is.data.frame(df))
df_count <-
df %>%
as_tibble() %>%
group_by(eea_cell_code) %>%
count()
return(df_count)
}
#' Add number of observation to a sf object of the EEA grid
#'
#' @param df a data.frame
#'
#' @return
#' @export
#'
#' @examples
add_n_obs_to_eea_grid <- function(df) {
require(sf)
require(dplyr)
require(tidyr)
## Read EEA grid
be_grid <- read_sf("./data/20220630/20220630_eea_1x1km_grid_BE.gpkg")
## add number of observations via inner_join (cells with no obs automatically
## excluded)
be_grid_n_obs <-
be_grid %>%
inner_join(df,
by = c("CELLCODE" = "eea_cell_code")
)
return(be_grid_n_obs)
}
```
## Challenge 3
Lynn:
```
visualize_obs_cells <- function(df,
species = "Harmonia axyridis",
year = 2010,
palette = colorBin(palette = "YlOrRd",
domain = df$n),
fill_op = 0.7) {
labels <- paste0("EEA cell: ",
df$CELLCODE,
"<br/>Species: ",
species,
"<br/>Number of observations: ",
df$n) %>%
lapply(HTML)
leaflet(st_transform(df,
crs = 4326)) %>%
addTiles() %>%
addPolygons(fillColor = ~palette(n),
weight = 1,
opacity = 1,
color = "purple",
fillOpacity = fill_op,
highlightOptions = highlightOptions(weight = 2,
color = "black",
fillOpacity = 1,
bringToFront = TRUE),
label = labels,
labelOptions = labelOptions(style = list("font-weight" = "normal",
padding = "3px 8px"),
textsize = "15px",
direction = "auto")) %>%
addLegend(title = paste0(species,
" (",
year,
")<br>Number of observations"),
pal = palette,
values = ~n)
}
```
# Challenge 1, 2 & 3
Hans Functions:
```
#' Get the observation for a species in a given year
#'
#' @param species character string
#' @param year numeric year
#'
#' @return a dataframe
#' @export
#'
#' @examples
#' get_obs(species = "Harmonia axyridis", year = 2010)
get_obs <- function(
species,
year
) {
require(readr)
require(assertthat)
require(stringr)
assert_that(is.character(species))
assert_that(is.numeric(year))
## set scientific name to lowercase
species <- tolower(species)
## replace spaces with underscores
species <- str_replace_all(
species,
pattern = " ",
replacement = "_"
)
## compose filename
file_name <- paste0("20220630_", species, "_", year, ".txt")
## read file
species_data <- read_tsv(paste0("./data/20220630/",
file_name))
return(species_data)
}
#' Clean the data
#'
#' @param df a data.frame containing columns ...
#' @param coord_max_uncertainty max coordinate uncertainty in meters
#' @param remove_issues a character vector with valid issue names to be removed
#' @param remove_status a character vector with valid status names to be removed
#'
#' @return
#' @export
#'
#' @examples
clean_data <- function(
df,
coord_max_uncertainty = 1000,
remove_issues = c(
"ZERO_COORDINATE",
"COORDINATE_OUT_OF_RANGE",
"COORDINATE_INVALID",
"COUNTRY_COORDINATE_MISMATCH"
),
remove_status = c(
"absent",
"excluded"
)) {
require(dplyr)
require(assertthat)
assert_that(is.data.frame(df))
assert_that(is.character(remove_issues))
assert_that(is.character(remove_status))
# Remove data with coordinate uncertainty higher than 1000 meters
df_cleaned <-
df %>%
filter(coordinateUncertaintyInMeters < coord_max_uncertainty |
is.na(coordinateUncertaintyInMeters)) %>%
## Remove data with some issues
filter(!issue %in% remove_issues) %>%
## Remove obs linked to absences or exclusions
filter(!occurrenceStatus %in% remove_status)
return(df_cleaned)
}
#' Add spatial information EEA
#'
#' @param df
#' @param longitude_colname character, name of the column with longitude values
#' @param latitude_colname character, name of the column with latitude values
#'
#' @return
#' @export
#'
#' @examples
assign_eea_cell <- function(
df,
longitude_colname,
latitude_colname
) {
require(dplyr)
require(sf)
require(assertthat)
assert_that(is.data.frame(df))
assert_that(is.string(longitude_colname))
assert_that(is.string(latitude_colname))
## Step 3a: reprojection
df_coords <- df %>%
mutate(x = !!sym(longitude_colname),
y = !!sym(latitude_colname)) %>%
st_as_sf(coords = c("x", "y"), crs = 4326) %>%
st_transform(crs = 3035) %>%
st_coordinates() %>%
as_tibble() %>%
rename(x_3035 = X,
y_3035 = Y) %>%
bind_cols(df, .) %>%
## Step 3b: get the EAA grid cell code each observation belongs to and add it to
## the data.frame
mutate(eea_cell_code = paste0(
"1km",
"E", floor(x_3035/1000),
"N", floor(y_3035/1000))) %>%
select(-c(x_3035, y_3035))
return(df_coords)
}
#' Calculate number of observations per grid cell
#'
#' @param df a data.frame
#'
#' @return
#' @export
#'
#' @examples
get_n_obs_per_cell <- function(df) {
require(dplyr)
require(assertthat)
assert_that(is.data.frame(df))
df_count <-
df %>%
as_tibble() %>%
group_by(eea_cell_code, species, year) %>%
count()
return(df_count)
}
#' Add number of observation to a sf object of the EEA grid
#'
#' @param df a data.frame
#' @param be_grid Belgium part of the EEA grid
#'
#' @return
#' @export
#'
#' @examples
add_n_obs_to_eea_grid <- function(df, be_grid) {
require(sf)
require(dplyr)
require(tidyr)
## add number of observations via inner_join (cells with no obs automatically
## excluded)
be_grid_n_obs <-
be_grid %>%
inner_join(df,
by = c("CELLCODE" = "eea_cell_code")
)
return(be_grid_n_obs)
}
#' Plot a leaflet map of number of observations of a species on the EEA grid
#'
#' @param sf_df
#'
#' @return
#' @export
#'
#' @examples
visualize_obs_cells <- function(sf_df) {
require(leaflet)
require(sf)
require(dplyr)
require(htmltools)
pal <- colorBin(palette = "YlOrRd",
domain = sf_df$n)
labels <- sprintf(
"EEA cell: %s<br/>Species: %s<br/>Number of observations: %g",
sf_df$CELLCODE,
sf_df$species,
sf_df$n) %>%
lapply(HTML)
plot <- leaflet(st_transform(sf_df, crs = 4326)) %>%
addTiles() %>%
addPolygons(
fillColor = ~pal(n),
weight = 1,
opacity = 1,
color = "purple",
fillOpacity = 0.7,
highlightOptions = highlightOptions(weight = 2,
color = "black",
fillOpacity = 1,
bringToFront = TRUE),
label = labels,
labelOptions = labelOptions(
style = list("font-weight" = "normal", padding = "3px 8px"),
textsize = "15px",
direction = "auto")) %>%
addLegend(title = paste0(sf_df$species[1], " (",
sf_df$year[1], ")<br>Number of observations"),
pal = pal,
values = ~n)
return(plot)
}
```
Hans workflow:
```
source("./src/20220630/20220630_functions.R")
ha_2010 <- get_obs(species = "Harmonia axyridis", year = 2010)
ha_2011 <- get_obs(species = "Harmonia axyridis", year = 2011)
cb_2010 <- get_obs(species = "Chorthippus biguttulus", year = 2010)
eea_be_grid <- read_sf("./data/20220630/20220630_eea_1x1km_grid_BE.gpkg")
ha_2010 %>%
clean_data() %>%
assign_eea_cell(longitude_colname = "decimalLongitude",
latitude_colname = "decimalLatitude")%>%
get_n_obs_per_cell() %>%
add_n_obs_to_eea_grid(be_grid = eea_be_grid) %>%
visualize_obs_cells()
```