owned this note
owned this note
Published
Linked with GitHub
# INBO CODING CLUB
29 November, 2018
Welcome!
## Share your code snippet
If you want to share your code snippet, copy paste your snippet within a section of three backticks (```):
As an **example**:
```r
library(tidyverse)
...
```
(*you can copy paste this example and add your code further down, but do not fill in your code in this section*)
Your snippets:
## Challenge 1
### ADD TITLE OF YOUR SNIPPET
```r
read_csv(file = "../data/20180426_visdata_cleaned.csv") %>%
select(datum, meetpuntomschrijving, soort, aantal, gewicht) %>%
filter(
meetpuntomschrijving == "Zandplaat Kastel",
soort %in% c("snoekbaars", "paling", "spiering")
) %>%
mutate(
year = lubridate::year(datum) %>%
factor()
) -> vis_data
vis_data %>%
distinct(soort) %>%
arrange(soort) %>%
as.list()
```
### Alternative use `semi_join`
```r
read_csv(file = "../data/20180426_visdata_cleaned.csv") %>%
select(datum, meetpuntomschrijving, soort, aantal, gewicht) %>%
semi_join(
tibble(
meetpuntomschrijving = "Zandplaat Kastel",
soort = c("snoekbaars", "paling", "spiering")
),
by = c("meetpuntomschrijving", "soort")
) %>%
mutate(
year = lubridate::year(datum) %>%
factor()
) -> vis_data
```
### Langer alternatief voor `sort`
```r
vis_data %>%
select(soort) %>%
arrange(soort) %>%
group_by(soort) %>%
slice(1)
```
The alternative requires longer code and is much slower
```r
library(microbenchmark)
microbenchmark(
distinct = {
vis_data %>%
distinct(soort) %>%
arrange(soort)
},
slice = {
vis_data %>%
select(soort) %>%
arrange(soort) %>%
group_by(soort) %>%
slice(1)
}
)
```
```
Unit: milliseconds
expr min lq mean median uq max neval cld
distinct 1.297765 1.373935 1.550536 1.430737 1.535608 3.061432 100 a
slice 2.868131 2.942646 3.277446 3.052135 3.216146 6.119257 100 b
```
###...
## Challenge 2
### Solution 1
```r
camera_trap_data <- read.csv(file = "../data/20180123_observations_NPHK_cameratrapping.csv", stringsAsFactors = FALSE, as.is = TRUE)
ctd <- camera_trap_data %>%
mutate(deploymentEnd = as.POSIXct(strptime(deploymentEnd, format = "\"%Y-%m-%dT%H:%M:%OSZ\""))) %>%
mutate(deploymentStart = as.POSIXct(strptime(deploymentStart, format = "\"%Y-%m-%dT%H:%M:%OSZ\""))) %>%
mutate(observationTimestamp = as.POSIXct(strptime(observationTimestamp, format = "\"%Y-%m-%dT%H:%M:%OSZ\"")))
ctd_humans <- ctd %>%
filter(animalVernacularName == "Human")
cpm <- ctd_humans %>%
select(sequenceMonth, animalCount) %>%
group_by(sequenceMonth) %>%
mutate(humans_observed = sum(animalCount)) %>%
arrange(desc(humans_observed))
```
### Solution 2
```r
library(lubridate)
read_csv(
file = "../data/20180123_observations_NPHK_cameratrapping.csv"
) %>%
mutate(
deploymentStart = as_datetime(deploymentStart),
deploymentEnd = as_datetime(deploymentEnd),
observationTimestamp = as_datetime(observationTimestamp)
) %>%
filter(animalVernacularName == "Human") -> ct
ct %>%
group_by(sequenceMonth) %>%
summarise(humans_observed = sum(animalCount)) %>%
arrange(desc(humans_observed))
```
### Solution 3
```r
camera_trap_data <- read_csv(file = "../data/20180123_observations_NPHK_cameratrapping.csv")
op <- options(digits.secs = 3)
camera_trap_data <- camera_trap_data %>%
mutate(deploymentStart = lubridate::as_datetime(deploymentStart),
deploymentEnd = lubridate::as_datetime(deploymentEnd),
observationTimestamp = lubridate::as_datetime(observationTimestamp))
head(camera_trap_data$deploymentStart)
camera_trap_data %>%
group_by(sequenceMonth) %>%
mutate(humans = ifelse(animalVernacularName == "Human", TRUE, FALSE)) %>%
filter(humans) %>%
summarise(humans_observed = sum(humans)) %>%
arrange(-humans_observed)
```
### Solve POSIX issue
```r
camera_trap_data2 <- camera_trap_data %>%
mutate(deploymentStart = as.POSIXct(strptime(deploymentStart,
format = "\"%Y-%m-%dT%H:%M:%OSZ\"")),
deploymentEnd = as.POSIXct(strptime(deploymentEnd,
format = "\"%Y-%m-%dT%H:%M:%OSZ\"")),
observationTimestamp = as.POSIXct(strptime(observationTimestamp,
format = "\"%Y-%m-%dT%H:%M:%OSZ\"")))
```
### Shortest version
```r
camera_trap_data_01 <- camera_trap_data %>%
mutate(deploymentStart01 = as_datetime(deploymentStart)) %>%
mutate(deploymentEnd01 = as_datetime(deploymentEnd))
```
## Challenge 3
### Solution 1
```r
read_csv(file = "../data/20180426_visdata_cleaned.csv") %>%
filter(!is.na(lengte), !is.na(gewicht)) %>%
mutate(year = lubridate::year(datum)) -> vis_data
vis_data %>%
mutate(density = gewicht/(4/3*pi*(lengte/2)^3)) %>%
group_by(meetpuntomschrijving, year, soort) %>%
summarise(
q05 = quantile(density, 0.05),
q50 = median(density),
q95 = quantile(density, 0.95)
)
```
### Solution 2
```r
# load packages
library(tidyverse)
library(lubridate)
library(purrr)
# read csv file and preprocessing
vis_data <- read_csv(file = "../data/20180426_visdata_cleaned.csv")
vis_data <- vis_data %>%
filter(!is.na(lengte) & !is.na(gewicht)) %>%
mutate(year = lubridate::year(datum))
#' Function to calculate the fish spherical density distribution
#'
#' The spherical density distribution doesn't take into account the abundance of
#' each species. For this reason the mean of the spherical density for each fish
#' species is calculated at first. The means are then used to calculate the
#' quantiles (0%, 25%, 50%; 75%; 100%) of the spherical density distribution.
#' @param df A data.frame containing at least the columns "soort", "gewicht" and
#' "lengte".
#' @return A named vector
#' @examples
#' library(tidyverse)
#' library(lubridate)
#' library(purrr)
#' vis_data <- read_csv(file = "../data/20180426_visdata_cleaned.csv")
#' vis_data %>%
#' mutate(year = lubridate::year(datum)) %>%
#' spherical_density_distribution()
#'
spherical_density_distribution <- function(df, probs = c(0, 0.5, 1)) {
q_index <- df %>%
group_by(soort) %>%
summarize(density = mean(gewicht/(4/3*pi*(lengte/2)^3)), na.rm = TRUE) %>%
pull(density) %>%
quantile(na.rm = TRUE, probs = probs)
q_index <- data.frame(probs = probs, quantiles = q_index)
return(q_index)
}
calc_quantiles <- vis_data %>%
nest(-year, -meetpuntnummer) %>%
mutate(quantiles = map(data, spherical_density_distribution)) %>%
unnest(quantiles)
calc_quantiles
```
### Solution 3
Zelfde resultaat als Solution 2, anders dan Solution 1.
Welke is het juiste?
```r
vis_data %>%
group_by(soort, year, meetpuntnummer) %>%
summarize(density = mean(gewicht/(4/3*pi*(lengte/2)^3))) %>%
group_by(year, meetpuntnummer) %>%
summarize(q0 = min(density),
q50 = median(density),
q100 = max(density))
```