INBO CODING CLUB

24 Feburary 2022

Welcome!

Share your code snippet

If you want to share your code snippet, copy paste your snippet within a section of three backticks (```):

As an example:

library(tidyverse)

(you can copy paste this example and add your code further down)

Yellow sticky notes

No yellow sticky notes online. Put your name + " | " and add a "*" each time you solve a challenge (see below).

Participants

Name Challenges
Damiano Oldoni ***
Emma Cartuyvels **
Matthieu Chastel
Hans Van Calster *
Vincent Smeekens *
Adriaan Seynaeve
Abhinav Malasi
Jasmijn Hillaert

Challenge 1

Emma:

# read data file  `20220224_gent_groeiperwijk.txt` containing the demographic
# evolution of Ghent districts from 1999 to 2009
gent_groeiperwijk <- read_delim("data/20220224/20220224_gent_groeiperwijk.txt",
           delim = ";",
           col_types = "cnnnnnnnnnnn",
           quote = "''")

# read sheet `ALL DATA` from Excel datafile `20220224_manta_2014_2015.xlsx` with
# manta related data

manta_2014_2015 <- read_excel("data/20220224/20220224_manta_2014_2015.xlsx",
           sheet = "ALL DATA",
           skip = 1,
           na = "/")

# Read butterfly related data file `20220224_butterflies_counts.txt` with
# semicolon (;) as delimiter, dot (.) as decimal mark and question mark (?) as
# NA. Read only the first 500 rows. Be sure column `Date` is parsed as
# datetime by using `format = "%d/%m/%Y %H:%M:%S"`.

butterflies_counts <- read_delim("data/20220224/20220224_butterflies_counts.txt",
           delim = ";",
           locale = locale(date_format = "%d/%m/%Y %H:%M:%S",
                           decimal_mark = "."),
           na = "?",
           n_max = 500)

Hans

library(readr)
library(readxl)
library(googlesheets4)
library(here)

## CHALLENGE 1

# read data file  `20220224_gent_groeiperwijk.txt` containing the demographic
# evolution of Ghent districts from 1999 to 2009
datapath <- here("data", "20220224")
gent_groei <- read_delim(
  here(datapath, "20220224_gent_groeiperwijk.txt"),
  quote = "'")

gent_groei

# read sheet `ALL DATA` from Excel datafile `20220224_manta_2014_2015.xlsx` with
# manta related data

manta <- read_excel(here(datapath, "20220224_manta_2014_2015.xlsx"),
                    sheet = "ALL DATA", skip = 1)

# Read butterfly related data file `20220224_butterflies_counts.txt` with
# semicolon (;) as delimiter, dot (.) as decimal mark and question mark (?) as
# NA. Read only the first 500 rows. Be sure column `Date` is parsed as
# datetime by using `format = "%d/%m/%Y %H:%M:%S"`.

butterflies <- read_delim(
  file = here(datapath, "20220224_butterflies_counts.txt"),
  delim = ";",
  na = "?",
  locale = locale(
    decimal_mark = ".",
    date_format = "%d/%m/%Y %H:%M:%S"),
  n_max = 500)

butterflies

Matthieu

gent_groeip <- read.csv2("./data/20220224/20220224_gent_groeiperwijk.txt")
Manta <- read_excel("./data/20220224/20220224_manta_2014_2015.xlsx", sheet = "ALL DATA")

Challenge 2

Hans


## CHALLENGE 2: googlesheets

# 1. Read the sheet `HyaHoVrBl` from a googlesheet about [hyacint coverage in Ename](https://docs.google.com/spreadsheets/d/1Tc8U-ud4dEcxgOojS80w7gdQQ-Ac85A4dN-u47NSmYg)(https://docs.google.com/spreadsheets/d/1Tc8U-ud4dEcxgOojS80w7gdQQ-Ac85A4dN-u47NSmYg) paying attention to import all columns correctly

hya <- read_sheet(
  ss = "1Tc8U-ud4dEcxgOojS80w7gdQQ-Ac85A4dN-u47NSmYg",
  sheet = "HyaHoVrBl",
  na = c("NA", "na", "NaN")) %>%
  janitor::clean_names()
hya

# Extra: column names contain spaces, let's get rid of them with janitor package
if (!"janitor" %in% installed.packages()) {
  install.packages("janitor")
}


# 2. Get metadata of the Google Sheet file

hya_meta <- sheet_properties(ss = "1Tc8U-ud4dEcxgOojS80w7gdQQ-Ac85A4dN-u47NSmYg")
hya_meta

# 3. But what if you don't want to allow tidyverse API to get access to your INBO
# Google account? Well, you can publish the sheet to the web as csv and then
# import it via `read_delim()` or `read_csv()`. Note: This method is useful if you
# don't have to import a lot of Googlesheets and they can be publicly available.
# Read the csv generated in this way by using this link:

hyacint_cov_link <- "https://docs.google.com/spreadsheets/d/e/2PACX-1vTpyMEyzb_M8vrSFUSzTegR4czallWdSUsDCuivdUIwdy7g8PIfrEJtgGBIXPV2UKKRaIZxN7o2O085/pub?gid=1007047746&single=true&output=csv"

hya_csv <- read_csv(hyacint_cov_link,
                    na = c("NA", "na", "NaN")) %>%
  janitor::clean_names()

hya_csv

Emma:

ss = "https://docs.google.com/spreadsheets/d/1Tc8U-ud4dEcxgOojS80w7gdQQ-Ac85A4dN-u47NSmYg"

HyaHoVrBl <- read_sheet(ss,
           sheet = "HyaHoVrBl")

# Extra: column names contain spaces, let's get rid of them with janitor package
if (!"janitor" %in% installed.packages()) {
  install.packages("janitor")
}
library(janitor)

HyaHoVrBl <- clean_names(HyaHoVrBl)

# 2. Get metadata of the Google Sheet file

gs4_get(ss)

# 3. But what if you don't want to allow tidyverse API to get access to your INBO
# Google account? Well, you can publish the sheet to the web as csv and then
# import it via `read_delim()` or `read_csv()`. Note: This method is useful if you
# don't have to import a lot of Googlesheets and they can be publicly available.
# Read the csv generated in this way by using this link:

hyacint_cov_link <- read_delim("https://docs.google.com/spreadsheets/d/e/2PACX-1vTpyMEyzb_M8vrSFUSzTegR4czallWdSUsDCuivdUIwdy7g8PIfrEJtgGBIXPV2UKKRaIZxN7o2O085/pub?gid=1007047746&single=true&output=csv",
                               delim = ",")

Challenge 3

Hans

## CHALLENGE 3: locale

# You  get meteorological data from Wallonia (Waterloo) in Walloon! 1. Parse
# column `Hour` as a column containing number (by default read as a character
# column) 2. Parse column `AV Quality Code` as a column containing numbers, not
# as logicals 3. Parse the column `Date` as a date column. Hint: give a look to
# the readr's [`locale`
# vignette](https://readr.tidyverse.org/articles/locales.html) and check the
# structure of a typical date names locale, e.g. nl <- locale("nl")$date_names.
# This is a nice datetime [format specifications
# compendium](https://www.stat.berkeley.edu/~s133/dates.html)

days_walloon <- c("londi", "mårdi", "mierkidi", "djudi", "vénrdi",
  "semdi", "dimegne")
months_walloon <- c("djanvî","fevrî", "måss", "avri", "may", "djun", "djulete",
                    "awousse","setimbre", "octôbe", "nôvimbe", "decimbe")
days_abbr_walloon <- c("lon", "mår", "mie", "dju", "vén", "sem", "dim")
months_abbr_walloon <- c("djan","fev", "mås", "avr", "may", "djun", "djul",
                         "awou","set", "oct", "nôv", "dec")


walloon_names <- date_names(mon = months_walloon,
                      mon_ab = months_abbr_walloon,
                      day = days_walloon,
                      day_ab = days_abbr_walloon,
                      am_pm = c("AM", "PM"))

read_delim(file = here(datapath, "20220224_rainfall_waterloo.txt"),
           delim = ",", skip = 6,
           col_types = cols(Date = col_date("%a %d %B %Y"),
                            Hour = col_time("%H")),
           locale = locale(date_names = walloon_names,
                           grouping_mark = "'",
                           decimal_mark = ".",
                           date_format = "%a %d %B %Y")
           )


## CHALLENGE 3: encoding
# Read file 20220224_latin-1_character_set.txt

latin1 <- read_delim(here(datapath, "20220224_latin-1_character_set.txt"),
                     locale = locale(encoding = "LATIN1"))

iconvlist()
latin1
Encoding(latin1$Char)
latin1$Char[22]

# Read file 20220224_turkish_iso8859-9.txt. Use View() to open the data.frame.
# Do you get the same while printing the data.frame on Console? Why?

turkish <- read_delim(here(datapath, "20220224_turkish_iso8859-9_encoding.txt"),
                      locale = locale(encoding = "x-mac-turkish"))
#not working...

Select a repo