owned this note
owned this note
Published
Linked with GitHub
# INBO CODING CLUB
24 June 2021
Welcome!
## Share your code snippet
If you want to share your code snippet, copy paste your snippet within a section of three backticks (```):
As an **example**:
```
library(tidyverse)
```
(*you can copy paste this example and add your code further down*)
## Yellow sticky notes
No yellow sticky notes online. Put your name + " | " and add a "*" each time you solve a challenge (see below).
## Participants
Name | Challenges
--- | ---
Damiano Oldoni | *
Raïsa Carmen | **
Lynn Pallemaerts |***
Emma Cartuyvels|**
Hans Van Calster | ***
Yasmine Verzelen |*
Adriaan Seynaeve|
Cécile Herr |**
Matthieu Chastel |*
Jasmijn Hillaert |
Amber Mertens |*
## Challenge 1
Emma:
```
read_obs <- function(species, year) {
## set scientific name to lowercase
species <- tolower(species)
## replace spaces with underscores
species <- str_replace_all(
species,
pattern = " ",
replacement = "_"
)
## compose filename
file_name <- paste0("20210624_", species, "_", year, ".txt")
## read file
read_tsv(paste0("./data/20210624/",
file_name))
}
```
Cécile:
```
get_obs <- function(myspecies, datereceived, myyear){
# format datereceived: character "yyyymmdd"
# (or maybe better to use prefix as a more general rule)
## set scientific name to lowercase
species <- tolower(myspecies)
## replace spaces with underscores
species <- str_replace_all(
species,
pattern = " ",
replacement = "_"
)
## compose filename
file_name <- paste0(datereceived, "_", species, "_", myyear ,".txt")
## read file
# probably I don't need to replace the date here in the path since I chose to work there
sp_year <- read_tsv(paste0("./data/20210624/",
file_name))
}
```
Hans:
```
get_obs <- function(species, year, prefix) {
## set scientific name to lowercase
species <- tolower(species)
## replace spaces with underscores
species <- stringr::str_replace_all(
species,
pattern = " ",
replacement = "_"
)
## compose filename
file_name <- paste0(prefix, "_", species, "_", year, ".txt")
## read file
observations <- readr::read_tsv(paste0("./data/20210624/",
file_name))
return(observations)
}
```
## Challenge 2
Emma:
```
#' STEP 2: data cleaning, remove imprecise or unverified observations, absences
#' Remove data with coordinate uncertainty higher than 1000 meters
clean_data <- function(df,
max_coord_uncertain = 1000,
issues_to_discard = c(
"ZERO_COORDINATE",
"COORDINATE_OUT_OF_RANGE",
"COORDINATE_INVALID",
"COUNTRY_COORDINATE_MISMATCH"
),
occurrenceStatus_to_discard = c(
"absent",
"excluded"
)) {
df <-
df %>%
filter(coordinateUncertaintyInMeters < max_coord_uncertain | is.na(coordinateUncertaintyInMeters))
#' Remove data with some issues
df <-
df %>%
filter(!issue %in% issues_to_discard)
#' Remove obs linked to absences or exclusions
df %>%
filter(!occurrenceStatus %in% occurrenceStatus_to_discard)
}
ha_2010 <- clean_data(ha_2010)
#' STEP 3: get the 1x1km EAA grid cellcode each observation belongs to and add
#' it to the observation data.frame
#' Step 3a: reprojection
assign_eea_cell <- function(df, longitude_colname, latitude_colname) {
df <- df %>%
mutate(x = !!sym(longitude_colname),
y = !!sym(latitude_colname))
df <- st_as_sf(df, coords = c("x", "y"), crs = 4326)
df <- st_transform(df, crs = 3035)
coords <- st_coordinates(df) %>%
as_tibble() %>%
rename(x_3035 = X,
y_3035= Y)
df <-
df %>%
bind_cols(coords)
## Step 3b: get the EAA grid cell code each observation belongs to and add it to
## the data.frame
df %>%
mutate(eea_cell_code = paste0(
"1km",
"E", floor(x_3035/1000),
"N", floor(y_3035/1000))) %>%
select(-c(x_3035, y_3035))
}
ha_2010 <- assign_eea_cell(ha_2010, "decimalLongitude", "decimalLatitude")
## Step 4: calculate number of observations of Harmonia axyridis in each grid cell
get_n_obs_per_cell <- function(df) {
df %>%
as_tibble() %>%
group_by(eea_cell_code) %>%
count()
}
n_obs_ha_2010 <- get_n_obs_per_cell(ha_2010)
#' STEP 5: read the EEA grid and get number of observations for each cell. Cells
#' without observations are removed
# Read EEA grid
be_grid <- st_read("./data/20210624/20210624_eea_1x1km_grid_BE.gpkg")
## add number of observations via inner_join (cells with no obs automatically
## excluded)
obs_to_grid <- function(grid, n_obs) {
grid %>%
inner_join(n_obs,
by = c("CELLCODE" = "eea_cell_code")
)
}
be_grid_n_obs_ha_2010 <- obs_to_grid(be_grid, n_obs_ha_2010)
```
Hans:
```
clean_data <- function(
df,
max_coord_uncertain,
issues_to_discard = c(
"ZERO_COORDINATE",
"COORDINATE_OUT_OF_RANGE",
"COORDINATE_INVALID",
"COUNTRY_COORDINATE_MISMATCH"
),
occurrenceStatus_to_discard = c(
"absent",
"excluded"
)) {
#' STEP 2: data cleaning, remove imprecise or unverified observations, absences
#' Remove data with coordinate uncertainty higher than 1000 meters
df <-
df %>%
filter(coordinateUncertaintyInMeters < max_coord_uncertain |
is.na(coordinateUncertaintyInMeters)) %>%
#' Remove data with some issues
filter(!issue %in% issues_to_discard) %>%
#' Remove obs linked to absences or exclusions
filter(!occurrenceStatus %in% occurrenceStatus_to_discard)
return(df)
}
assign_eea_cell <- function(
df,
longitude_colname,
latitude_colname) {
#' STEP 3: get the 1x1km EAA grid cellcode each observation belongs to and add
#' it to the observation data.frame
#' Step 3a: reprojection
df <- df %>%
mutate(x = !!sym(longitude_colname),
y = !!sym(latitude_colname))
df <- sf::st_as_sf(df, coords = c("x", "y"), crs = 4326)
df <- sf::st_transform(df, crs = 3035)
coords <- sf::st_coordinates(df) %>%
as_tibble() %>%
rename(x_3035 = X,
y_3035 = Y)
df <-
df %>%
bind_cols(coords)
## Step 3b: get the EAA grid cell code each observation belongs to and add it to
## the data.frame
df <-
df %>%
mutate(eea_cell_code = paste0(
"1km",
"E", floor(x_3035/1000),
"N", floor(y_3035/1000))) %>%
select(-c(x_3035, y_3035))
return(df)
}
get_n_obs_per_cell <- function(df) {
## Step 4: calculate number of observations of Harmonia axyridis in each grid cell
df %>%
as_tibble() %>%
group_by(eea_cell_code) %>%
count()
}
be_grid_n_obs <- function(df_n_obs) {
#' STEP 5: read the EEA grid and get number of observations for each cell. Cells
#' without observations are removed
# Read EEA grid
be_grid <- sf::read_sf(here::here("./data/20210624/20210624_eea_1x1km_grid_BE.gpkg"))
## add number of observations via inner_join (cells with no obs automatically
## excluded)
be_grid <- be_grid %>%
inner_join(df_n_obs,
by = c("CELLCODE" = "eea_cell_code")
)
return(be_grid)
}
```
Matthieu :
```
clean_data <- function(df,max_coord_uncertain,issues_to_discard,occurrenceStatus_to_discard){
#' STEP 2: data cleaning, remove imprecise or unverified observations, absences
#' Remove data with coordinate uncertainty higher than 1000 meters
df <-
df %>%
filter(coordinateUncertaintyInMeters < max_coord_uncertain | is.na(coordinateUncertaintyInMeters))
#' Remove data with some issues
df <-
df %>%
filter(!issue %in% issues_to_discard)
#' Remove obs linked to absences or exclusions
df <-
df %>%
filter(!occurrenceStatus %in% occurrenceStatus_to_discard)
return(df)
}
```
## INTERMEZZO
## Challenge 3
Hans:
```
visualize_obs_cells <- function(be_grid_data) {
require(leaflet)
require(htmltools)
require(sf)
## STEP 6: make a leaflet heatmap
pal <- colorBin(palette = "YlOrRd",
domain = be_grid_data$n)
labels <- sprintf(
"EEA cell: %s<br/>Species: Harmonia axyridis<br/>Number of observations: %g",
be_grid_data$CELLCODE,
be_grid_data$n) %>%
lapply(HTML)
leaflet(st_transform(be_grid_data, crs = 4326)) %>%
addTiles() %>%
addPolygons(
fillColor = ~pal(n),
weight = 1,
opacity = 1,
color = "purple",
fillOpacity = 0.7,
highlightOptions = highlightOptions(weight = 2,
color = "black",
fillOpacity = 1,
bringToFront = TRUE),
label = labels,
labelOptions = labelOptions(
style = list("font-weight" = "normal", padding = "3px 8px"),
textsize = "15px",
direction = "auto")) %>%
addLegend(title = "Harmonia axyridis (2010)<br>Number of observations",
pal = pal,
values = ~n)
}
make_report <- function(species, year, prefix) {
df <- get_obs(
species = species,
year = year,
prefix = prefix
)
df <- clean_data(df = df, max_coord_uncertain = 1000)
df <- assign_eea_cell(
df = df,
longitude_colname = "decimalLongitude" ,
latitude_colname = "decimalLatitude"
)
df_n_obs <- get_n_obs_per_cell(df)
be_grid_df_n_obs <- be_grid_n_obs(df_n_obs)
leaflet_map <- visualize_obs_cells(
be_grid_data = be_grid_df_n_obs)
return(
list(
df = df,
leaflet_map = leaflet_map
)
)
}
report <- make_report(species = "Harmonia axyridis",
year = 2010,
prefix = "20210624")
report$df
report$leaflet_map
```
## Bonus challenge