# INBO CODING CLUB
16 December 2025
Welcome!
## Share your code snippet
If you want to share your code snippet, copy paste your snippet within a section of three backticks (```):
As an **example**:
```
library(tidyverse)
```
(*you can copy paste this example and add your code further down*)
## Yellow sticky notes
No yellow sticky notes online. Put your name + " | " and add a "*" each time you solve a challenge (see below).
## Participants
Name | Challenges
--- | ---
Damiano Oldoni | ***
Falk Mielke |
Hans Van Calster | *
Els Lommelen |
Anja Leyman |
Sanne Govaert | *
## interesting links and ideas
*add additional information here if you find it worth sharing*
*e.g.:*
`configr` - an R package to pares config files and store/read parameters for your project in text (a bit more readable than `json`):
+ https://miachol.r-universe.dev/configr
+ analogous in python: [→ `configParser`](https://docs.python.org/3/library/configparser.html)
honorable mention to [→ `dbplyr`] (https://dbplyr.tidyverse.org/articles/dbplyr.html):
+ facilitates database connection from R
+ SQLite can be used as a driver to store data in a database file on disk
+ via [`RSQLite`](https://rsqlite.r-dbi.org) / [`DBI`](https://dbi.r-dbi.org)
+ ... other databases might be more efficient
note on issues with
`readr::read_csv("https://zenodo.org/records/10054153/files/MH_ANTWERPEN-gps-2018.csv.gz?download=1")`, which throws the following error:
```
Error in vroom_(file, delim = delim %||% col_types$delim, col_names = col_names, :
embedded nul in string: '\037\ [...]
```
+ `?download=1` suggests your browser to download a file, instead of displaying it
+ but: `readr::read_csv` seems to have issues to figure out the file type if that tag is present
+ simply remove the `?download=1` part and `read_csv` will work
## Challenge 1
### Damiano's solution (example)
Copy paste this section to show your solutions.
```r
# dummy code
print("This is how to insert code.")
```
### Falk's attempt (to be updated)
+ manually download <https://zenodo.org/records/10054153/files/MH_ANTWERPEN-reference-data.csv?download=1>
```r
library("readr")
library("magrittr")
library("lubridate")
library("here")
library("knitr")
mh_ref_path <- here::here("data", "20251216/MH_ANTWERPEN-reference-data.csv")
# mh_data_path_gz <- here::here("data", "20251216/20251216_MH_ANTWERPEN-gps-2018.csv.gz")
read_ref_data <- function(mh_ref_path) {
ref_data <- readr::read_csv(
mh_ref_path,
col_type = list(
.default = col_character(),
"deploy-on-date" = col_datetime("%Y-%m-%d %H:%M:%OS"), # parsed below
"deploy-off-date" = col_datetime("%Y-%m-%d %H:%M:%OS"), # parsed below
"deploy-on-latitude" = "double",
"deploy-on-longitude" = "double",
"animal-mass" = "double",
"tag-mass" = "double"
) # data types manually assembled from `datapackage.json`
)
names(ref_data) <- make.names(names(ref_data), unique = TRUE)
ref_data %>%
return()
}
ref_data <- read_ref_data(mh_ref_path)
# ref_data <- read_ref_data(mh_data_path_gz) # readr can just unpack `.gz` directly. But, as I noted here, the one we are supposed to load is a data file and different.
# remove hyphens from column names
# ??lubridate::parsedatetime
# parsing still does not work correctly with fractional seconds.
# ref_data %<>%
# mutate(
# deploy.on.date =
# as.POSIXct(deploy.on.date, format = "%Y-%m-%d %H:%M:%OS") # this removes the milliseconds
# )
# ref_data %>% pull(deploy.on.date) %>% .[[1]]
ref_data %>% glimpse()
ref_data %>%
head(2) %>%
t() %>% knitr::kable()
```
### Floris
```r
library(frictionless)
library(readr)
library(dplyr)
library(jsonlite)
# CHALLENGE 1A
# #############################################################################
# 1. read reference data
package <- read_package("https://zenodo.org/records/10054153/files/datapackage.json")
package
resources(package)
schema_ref <- get_schema(package, "reference-data")
str(schema_ref$fields)
ref_data_0 <- read_csv("https://zenodo.org/records/10054153/files/MH_ANTWERPEN-reference-data.csv")
spec(ref_data_0)
ref_data <-
read_csv(
"https://zenodo.org/records/10054153/files/MH_ANTWERPEN-reference-data.csv",
col_types = cols(
`deploy-on-date` = col_datetime(format = "%Y-%m-%d %H:%M:%S"),
`deploy-off-date` = col_datetime(format = "%Y-%m-%d %H:%M:%S"),
`animal-mass` = col_double(),
`deploy-on-latitude` = col_double(),
`deploy-on-longitude` = col_double(),
`tag-mass` = col_double(),
.default = col_character()
)
)
glimpse(ref_data)
# 2 read local zipped gps data
schema_gps <- get_schema(package, "gps")
str(schema_gps$fields)
gps_data_0 <- read_csv("data/20251216/20251216_MH_ANTWERPEN-gps-2018.csv.gz")
spec(gps_data_0)
gps_data <- read_csv(
"data/20251216/20251216_MH_ANTWERPEN-gps-2018.csv.gz",
col_types = cols(
`event-id` = col_double(),
visible = col_logical(),
timestamp = col_datetime(format = "%Y-%m-%d %H:%M:%S"),
`location-long` = col_double(),
`location-lat` = col_double(),
`bar:barometric-pressure` = col_double(),
`external-temperature` = col_double(),
`gps:dop` = col_double(),
`gps:satellite-count` = col_integer(),
`gps-time-to-fix` = col_double(),
`gps:vdop` = col_double(),
`ground-speed` = col_double(),
heading = col_double(),
`height-above-msl` = col_double(),
`import-marked-outlier` = col_logical(),
`location-error-numerical` = col_double(),
`manually-marked-outlier` = col_logical(),
`vertical-error-numerical` = col_double(),
`sensor-type` = col_character(),
`individual-taxon-canonical-name` = col_character(),
`tag-local-identifier` = col_character(),
`individual-local-identifier` = col_character(),
`study-name` = col_character()
)
)
glimpse(gps_data)
# 3 read remote zipped gps data
gps_data_url <-
read_csv(
"https://zenodo.org/records/10054153/files/MH_ANTWERPEN-gps-2018.csv.gz",
col_types = cols(
`event-id` = col_double(),
visible = col_logical(),
timestamp = col_datetime(format = "%Y-%m-%d %H:%M:%S"),
`location-long` = col_double(),
`location-lat` = col_double(),
`bar:barometric-pressure` = col_double(),
`external-temperature` = col_double(),
`gps:dop` = col_double(),
`gps:satellite-count` = col_integer(),
`gps-time-to-fix` = col_double(),
`gps:vdop` = col_double(),
`ground-speed` = col_double(),
heading = col_double(),
`height-above-msl` = col_double(),
`import-marked-outlier` = col_logical(),
`location-error-numerical` = col_double(),
`manually-marked-outlier` = col_logical(),
`vertical-error-numerical` = col_double(),
`sensor-type` = col_character(),
`individual-taxon-canonical-name` = col_character(),
`tag-local-identifier` = col_character(),
`individual-local-identifier` = col_character(),
.default = col_character()
)
)
glimpse(gps_data_url)
# compare with
read_resource(package, "gps") |> glimpse()
# CHALLENGE 1B
# #############################################################################
# 1. read datapackage.json with jsonlite
datapkg <- fromJSON("https://zenodo.org/records/10054153/files/datapackage.json")
str(datapkg, 3)
# 2. download whole deposit
n2khab::download_zenodo("10.5281/zenodo.10054153")
```
### Sanne's solution
```r
library(here)
# 1A.1
ref_data <- readr::read_csv(
file = "https://zenodo.org/records/10054153/files/MH_ANTWERPEN-reference-data.csv",
col_types = list(
.default = col_character(),
"deploy-on-date" = col_datetime(format = "%Y-%m-%d %H:%M:%OS"),
"deploy-off-date" = col_datetime(format = "%Y-%m-%d %H:%M:%OS"),
"animal-mass" = col_double(),
"deploy-on-latitude" = col_double(),
"deploy-on-longitude" = col_double(),
"tag-mass" = col_double()
)
)
spec(ref_data)
# 1A.2
gps_data <- readr::read_csv(
here::here("data", "20251216", "20251216_MH_ANTWERPEN-gps-2018.csv.gz")
)
# 1A.3
gps_data_url <- readr::read_csv(
"https://zenodo.org/records/10054153/files/MH_ANTWERPEN-gps-2022.csv.gz"
)
# 1B.1
library(jsonlite)
datapackage <- jsonlite::fromJSON(
"https://zenodo.org/records/10054153/files/datapackage.json"
)
# 1B.2
library(inborutils)
zenodo_deposit <- inborutils::download_zenodo(
doi = "10.5281/zenodo.10054153",
path = here::here("data", "20251216"),
parallel = TRUE,
quiet = FALSE
)
```
### Hans
```
url_datapackage_json <- "https://zenodo.org/records/10054153/files/datapackage.json?download=1"
url_reference_data <- "https://zenodo.org/records/10054153/files/MH_ANTWERPEN-reference-data.csv?download=1"
colspecs <- jsonlite::read_json(url_datapackage_json)
colspecs <- colspecs$resources[[1]]$schema$fields
colspecs_df <- purrr::map_dfr(
colspecs,
\(x) data.frame(name = x$name, type = x$type, format = x$format)
)
colspecs_df
ref_data <- readr::read_csv(
url_reference_data,
col_types = cols(
`tag-id` = col_double(),
`animal-id` = col_character(),
`animal-taxon` = col_character(),
`deploy-on-date` = col_datetime(format = "%Y-%m-%d %H:%M:%S.%f"),
`deploy-off-date` = col_logical(),
`alt-project-id` = col_character(),
`animal-comments` = col_character(),
`animal-life-stage` = col_character(),
`animal-mass` = col_double(),
`animal-nickname` = col_character(),
`animal-ring-id` = col_logical(),
`animal-sex` = col_character(),
`attachment-type` = col_character(),
`deploy-on-latitude` = col_double(),
`deploy-on-longitude` = col_double(),
`deploy-on-measurements` = col_character(),
`deployment-comments` = col_logical(),
`deployment-end-type` = col_logical(),
`deployment-id` = col_double(),
`location-accuracy-comments` = col_character(),
`manipulation-type` = col_character(),
`study-site` = col_character(),
`tag-firmware` = col_character(),
`tag-manufacturer-name` = col_character(),
`tag-mass` = col_double(),
`tag-readout-method` = col_character(),
`tag-serial-no` = col_double()
)
)
ref_data$`deploy-on-date` <- parse_datetime(
as.character(ref_data$`deploy-on-date`), format = "%Y-%m-%d %H:%M:%S.%f"
)
ref_data$`deploy-off-date` <- parse_datetime(
as.character(ref_data$`deploy-off-date`), format = "%Y-%m-%d %H:%M:%S.%f"
)
ref_data
## Use readr package to read the GPS zipped file 20251216_MH_ANTWERPEN-gps-2018.csv.gz into R as a data frame called gps_data.
mh_antwerp_file <- here::here("data/20251216/20251216_MH_ANTWERPEN-gps-2018.csv.gz")
file.exists(mh_antwerp_file)
gps_data <- read_csv(
file = mh_antwerp_file
)
## Use readr package to read the same file (without prefix 20251216_) from zenodo into R as a data frame called gps_data_url.
url_gps_data <- "https://zenodo.org/records/10054153/files/MH_ANTWERPEN-gps-2018.csv.gz?download=1"
url_gps_data <- gsub(".download=1$", "", url_gps_data)
gps_data_url <- readr::read_csv(url_gps_data)
# Challenge 1B
test <- tempdir()
inborutils::download_zenodo(
doi = "10.5281/zenodo.10054153",
path = test,
parallel = TRUE
)
# if it fails, try parallel = FALSE
```
## Challenge 2
## Challenge 3