# INBO CODING CLUB 24 Feburary 2022 Welcome! ## Share your code snippet If you want to share your code snippet, copy paste your snippet within a section of three backticks (```): As an **example**: ``` library(tidyverse) ``` (*you can copy paste this example and add your code further down*) ## Yellow sticky notes No yellow sticky notes online. Put your name + " | " and add a "*" each time you solve a challenge (see below). ## Participants Name | Challenges --- | --- Damiano Oldoni | *** Emma Cartuyvels |** Matthieu Chastel | Hans Van Calster | * Vincent Smeekens | * Adriaan Seynaeve | Abhinav Malasi | Jasmijn Hillaert| ## Challenge 1 Emma: ``` # read data file `20220224_gent_groeiperwijk.txt` containing the demographic # evolution of Ghent districts from 1999 to 2009 gent_groeiperwijk <- read_delim("data/20220224/20220224_gent_groeiperwijk.txt", delim = ";", col_types = "cnnnnnnnnnnn", quote = "''") # read sheet `ALL DATA` from Excel datafile `20220224_manta_2014_2015.xlsx` with # manta related data manta_2014_2015 <- read_excel("data/20220224/20220224_manta_2014_2015.xlsx", sheet = "ALL DATA", skip = 1, na = "/") # Read butterfly related data file `20220224_butterflies_counts.txt` with # semicolon (;) as delimiter, dot (.) as decimal mark and question mark (?) as # NA. Read only the first 500 rows. Be sure column `Date` is parsed as # datetime by using `format = "%d/%m/%Y %H:%M:%S"`. butterflies_counts <- read_delim("data/20220224/20220224_butterflies_counts.txt", delim = ";", locale = locale(date_format = "%d/%m/%Y %H:%M:%S", decimal_mark = "."), na = "?", n_max = 500) ``` Hans ``` library(readr) library(readxl) library(googlesheets4) library(here) ## CHALLENGE 1 # read data file `20220224_gent_groeiperwijk.txt` containing the demographic # evolution of Ghent districts from 1999 to 2009 datapath <- here("data", "20220224") gent_groei <- read_delim( here(datapath, "20220224_gent_groeiperwijk.txt"), quote = "'") gent_groei # read sheet `ALL DATA` from Excel datafile `20220224_manta_2014_2015.xlsx` with # manta related data manta <- read_excel(here(datapath, "20220224_manta_2014_2015.xlsx"), sheet = "ALL DATA", skip = 1) # Read butterfly related data file `20220224_butterflies_counts.txt` with # semicolon (;) as delimiter, dot (.) as decimal mark and question mark (?) as # NA. Read only the first 500 rows. Be sure column `Date` is parsed as # datetime by using `format = "%d/%m/%Y %H:%M:%S"`. butterflies <- read_delim( file = here(datapath, "20220224_butterflies_counts.txt"), delim = ";", na = "?", locale = locale( decimal_mark = ".", date_format = "%d/%m/%Y %H:%M:%S"), n_max = 500) butterflies ``` Matthieu ``` gent_groeip <- read.csv2("./data/20220224/20220224_gent_groeiperwijk.txt") Manta <- read_excel("./data/20220224/20220224_manta_2014_2015.xlsx", sheet = "ALL DATA") ``` ## Challenge 2 Hans ``` ## CHALLENGE 2: googlesheets # 1. Read the sheet `HyaHoVrBl` from a googlesheet about [hyacint coverage in Ename](https://docs.google.com/spreadsheets/d/1Tc8U-ud4dEcxgOojS80w7gdQQ-Ac85A4dN-u47NSmYg)(https://docs.google.com/spreadsheets/d/1Tc8U-ud4dEcxgOojS80w7gdQQ-Ac85A4dN-u47NSmYg) paying attention to import all columns correctly hya <- read_sheet( ss = "1Tc8U-ud4dEcxgOojS80w7gdQQ-Ac85A4dN-u47NSmYg", sheet = "HyaHoVrBl", na = c("NA", "na", "NaN")) %>% janitor::clean_names() hya # Extra: column names contain spaces, let's get rid of them with janitor package if (!"janitor" %in% installed.packages()) { install.packages("janitor") } # 2. Get metadata of the Google Sheet file hya_meta <- sheet_properties(ss = "1Tc8U-ud4dEcxgOojS80w7gdQQ-Ac85A4dN-u47NSmYg") hya_meta # 3. But what if you don't want to allow tidyverse API to get access to your INBO # Google account? Well, you can publish the sheet to the web as csv and then # import it via `read_delim()` or `read_csv()`. Note: This method is useful if you # don't have to import a lot of Googlesheets and they can be publicly available. # Read the csv generated in this way by using this link: hyacint_cov_link <- "https://docs.google.com/spreadsheets/d/e/2PACX-1vTpyMEyzb_M8vrSFUSzTegR4czallWdSUsDCuivdUIwdy7g8PIfrEJtgGBIXPV2UKKRaIZxN7o2O085/pub?gid=1007047746&single=true&output=csv" hya_csv <- read_csv(hyacint_cov_link, na = c("NA", "na", "NaN")) %>% janitor::clean_names() hya_csv ``` Emma: ``` ss = "https://docs.google.com/spreadsheets/d/1Tc8U-ud4dEcxgOojS80w7gdQQ-Ac85A4dN-u47NSmYg" HyaHoVrBl <- read_sheet(ss, sheet = "HyaHoVrBl") # Extra: column names contain spaces, let's get rid of them with janitor package if (!"janitor" %in% installed.packages()) { install.packages("janitor") } library(janitor) HyaHoVrBl <- clean_names(HyaHoVrBl) # 2. Get metadata of the Google Sheet file gs4_get(ss) # 3. But what if you don't want to allow tidyverse API to get access to your INBO # Google account? Well, you can publish the sheet to the web as csv and then # import it via `read_delim()` or `read_csv()`. Note: This method is useful if you # don't have to import a lot of Googlesheets and they can be publicly available. # Read the csv generated in this way by using this link: hyacint_cov_link <- read_delim("https://docs.google.com/spreadsheets/d/e/2PACX-1vTpyMEyzb_M8vrSFUSzTegR4czallWdSUsDCuivdUIwdy7g8PIfrEJtgGBIXPV2UKKRaIZxN7o2O085/pub?gid=1007047746&single=true&output=csv", delim = ",") ``` ## Challenge 3 Hans ``` ## CHALLENGE 3: locale # You get meteorological data from Wallonia (Waterloo) in Walloon! 1. Parse # column `Hour` as a column containing number (by default read as a character # column) 2. Parse column `AV Quality Code` as a column containing numbers, not # as logicals 3. Parse the column `Date` as a date column. Hint: give a look to # the readr's [`locale` # vignette](https://readr.tidyverse.org/articles/locales.html) and check the # structure of a typical date names locale, e.g. nl <- locale("nl")$date_names. # This is a nice datetime [format specifications # compendium](https://www.stat.berkeley.edu/~s133/dates.html) days_walloon <- c("londi", "mårdi", "mierkidi", "djudi", "vénrdi", "semdi", "dimegne") months_walloon <- c("djanvî","fevrî", "måss", "avri", "may", "djun", "djulete", "awousse","setimbre", "octôbe", "nôvimbe", "decimbe") days_abbr_walloon <- c("lon", "mår", "mie", "dju", "vén", "sem", "dim") months_abbr_walloon <- c("djan","fev", "mås", "avr", "may", "djun", "djul", "awou","set", "oct", "nôv", "dec") walloon_names <- date_names(mon = months_walloon, mon_ab = months_abbr_walloon, day = days_walloon, day_ab = days_abbr_walloon, am_pm = c("AM", "PM")) read_delim(file = here(datapath, "20220224_rainfall_waterloo.txt"), delim = ",", skip = 6, col_types = cols(Date = col_date("%a %d %B %Y"), Hour = col_time("%H")), locale = locale(date_names = walloon_names, grouping_mark = "'", decimal_mark = ".", date_format = "%a %d %B %Y") ) ## CHALLENGE 3: encoding # Read file 20220224_latin-1_character_set.txt latin1 <- read_delim(here(datapath, "20220224_latin-1_character_set.txt"), locale = locale(encoding = "LATIN1")) iconvlist() latin1 Encoding(latin1$Char) latin1$Char[22] # Read file 20220224_turkish_iso8859-9.txt. Use View() to open the data.frame. # Do you get the same while printing the data.frame on Console? Why? turkish <- read_delim(here(datapath, "20220224_turkish_iso8859-9_encoding.txt"), locale = locale(encoding = "x-mac-turkish")) #not working... ```