INBO CODING CLUB

# INBO CODING CLUB 28 February 2023 Welcome! ## Share your code snippet If you want to share your code snippet, copy paste your snippet within a section of three backticks (```): As an **example**: ``` library(tidyverse) ``` (*you can copy paste this example and add your code further down*) ## Yellow sticky notes No yellow sticky notes online. Put your name + " | " and add a "*" each time you solve a challenge (see below). ## Participants Name | Challenges --- | --- Damiano Oldoni | *** Pieter Huybrechts | **** Dirk Maes | *** Heleen Deroo | * An Leyssen | ** Wouter Depaepe | ** Raïsa Carmen | **** Nele Mullens | Arc'hantael Labriere | Zoé Hermans | Wim Kuypers | * Amber Mertens | *** Joppe Massant | ** Ioannis Rallis | ** Adriaan Seynaeve | *** ## Challenge 1 ### Pieter's solution ``` r # Display first 8 rows; display the last 8 rows. head(partridge,8) tail(partridge,8) ## dplyr equivalents, I'm not sure when you'd use one over the other slice_head(partridge,n = 8) slice_tail(partridge,n = 8) ## or you can even do this with square brackets! partridge[1:8,] partridge[{nrow(partridge)-7}:nrow(partridge),] # Select columns wbe and date. select(partridge,wbe,date) #tidy evaluation select(partridge,c(wbe,date)) # usin select(partridge,c("wbe","date")) #standard evaluation # Display the values of type. How many different values of type are? unique(pull(partridge,type)) length(unique(pull(partridge,type))) # data.table actually has a function for counting unique values data.table::uniqueN(partridge,"type") # you can also use distinct instead of unique, distinct is like unique but for # whole data.frames or columns partridge %>% select(type) %>% distinct %>% nrow # How to remove observations with type Sound? How to remove observations with # type Sound and wbe NA? filter(partridge, type != "Sound") ## you can keep adding filter expresssions filter(partridge, type != "Sound", !is.na(wbe)) ## or combine them filter(partridge, type != "Sound" & !is.na(wbe)) # Add a new column called seen which is TRUE if type is not "Sound". Add a # column month with the month of the observation. Tip: use lubridate::month() # function mutate(partridge, month = lubridate::month(date), seen = type != "Sound") ## or with case_when, useful for more complex cases ## I like to keep my assignment (<-) on a new line, so it's very clear I'm ## creating a new object (overwriting one in this case) partridge <- partridge %>% mutate(month = lubridate::month(date, label = TRUE)) %>% mutate(seen = case_when(type == "Sound" ~ FALSE, .default = TRUE)) # Place the column seen after type and month after date partridge %>% relocate(month, .after = date) %>% relocate(seen, .after = type) ## select also allows you to reorder columns, it'll return them in the same order ## as you request them, you can use this together with tidyselect select(partridge, date, month, year, wbe, fieldworker, type, seen, xlambert, ylambert) ``` ### An's solution ``` r # Display first 8 rows; display the last 8 rows. partridge %>% slice_head(n = 8) partridge %>% slice_tail(n = 8) # Select columns wbe and date. partridge %>% select(wbe, date) # Display the values of type. How many different values of type are? unique(partridge$type) partridge %>% distinct(type) # How to remove observations with type Sound? How to remove observations with type Sound and wbe NA? partridge %>% filter(!(type == "Sound")) partridge %>% filter(!(type == "Sound"), !is.na(wbe)) # Add a new column called seen which is TRUE if type is not "Sound". Add a column month with the month of the observation. Tip: use lubridate::month() function. Install lubridate package if not yet done. partridge_2 <- partridge %>% mutate(seen = ifelse(!(type == "Sound"), TRUE, FALSE), month = lubridate::month(date)) # Place the column seen after type and month after date partridge_3 <- partridge_2 %>% select(date, month, wbe, fieldworker, type, seen, xlambert:year) partridge_3 <- partridge_2 %>% relocate(seen, .after = type) %>% relocate(month, .after = date) ``` ### Amber's solution ```r ## CHALLENGE 1 # Display first 8 rows; display the last 8 rows. head(partridge, 8) tail(partridge, 8) # Select columns wbe and date. select(partridge, wbe, date) # Display the values of type. How many different values of type are? unique(partridge$type) # or: distinct(partridge, type) count(distinct(partridge, type)) # How to remove observations with type Sound? How to remove observations with type Sound and wbe NA? partridge %>% filter(type != "Sound") partridge %>% filter(type != "Sound", !is.na(wbe)) # Add a new column called seen which is TRUE if type is not "Sound". Add a column month with the month of the observation. Tip: use lubridate::month() function. Install lubridate package if not yet done. # Place the column seen after type and month after date partridge %>% mutate( seen = case_when( type != "Sound" ~ TRUE, TRUE ~ FALSE), month = lubridate::month(date, label = TRUE, abbr = FALSE) ) %>% relocate( seen, .after = type ) %>% relocate(month, .after = date) ``` ### Joppe's Solution ``` r ### 1. Display first 8 rows; display the last 8 rows. head(partridge, 8) tail(partridge, 8) ### 2. Select columns wbe and date. partridge$wbe partridge$date ### 3. Display the values of type. How many different values of type are? distinct(partridge, type) count(distinct(partridge, type)) ### 4. How to remove observations with type Sound? How to remove observations with type Sound and wbe NA? filter(partridge, !(type == 'Sound')) filter(partridge, !(type == 'Sound') & !is.na(wbe)) partridge <- mutate(partridge, seen=!(type == 'Sound')) partridge <- mutate(partridge, month=month(date)) partridge ### 5. # Relocate was wellicht cleaner col_order <- c("date", "month", "wbe", "fieldworker", "type", "seen", "xlambert", "ylambert", "year") partridge <- partridge[, col_order] partridge ``` ### Arc'hantael's solution ``` r # 1- Display the first 8 rows; display the last 8 rows head(partridge, 8) tail(partridge, 8) # 2- Select columns wbe and date partridge %>% select(wbe, date) # 3- display the values of type. How many different values of type are there? partridge %>% count(type) # 4- How to remove observations with type Sound? How to remove observations with # type Sound and wbe NA? partridge %>% filter(type != "Sound", wbe != "NA") # 5- Add a new column called seen which is TRUE if type is not "Sound". Add a # new column month with the month of the observation. Tip: use lubridate::month # function. partridge %>% mutate(seen = case_when(type != "Sound" ~ TRUE), month = month(date)) ``` ## Challenge 2 ### Pieter's solution ``` r # How many observations per WBE? Save it as overview_partridge as we will use it # in challenge 3. overview_partridge <- partridge %>% group_by(wbe) %>% tally # How many observations per WBE and type? partridge %>% group_by(wbe, type) %>% tally # Order the previous output per n_obs (descending order), wbe (ascending order) # and type (alphabetically). partridge %>% group_by(wbe, type) %>% tally(name = "n_obs") %>% # or add_tally() or add_count() arrange(-n_obs,wbe,type) # For each month, return the wbe with the highest number of observations and # report the number of observations for those wbe that month. ## filter can actually group internally, summarise is probably easier to read? partridge %>% group_by(wbe, month) %>% tally(name = "n_obs") %>% ungroup() %>% filter(n_obs == max(n_obs), .by = month) ``` ### Nele's solution ``` r ## 2.1 How many observations per WBE? Save it as overview_partridge as we will use it in challenge 3. overview_partridge <- count(partridge, wbe) ## 2.2 How many observations per WBE and type? count(partridge, wbe, type) %>% View() ## 2.3 Order the previous output per n_obs (descending order), ## wbe (ascending order) and type (alphabetically). partridge %>% group_by(wbe, type) %>% count(name = "n_obs") %>% arrange(desc(n_obs), wbe, type) ``` ### Amber's solution ``` r ## CHALLENGE 2 - Summaries # How many observations per WBE? Save it as overview_partridge as we will use it in challenge 3. overview_partridge <- partridge %>% group_by(wbe) %>% summarise(n_obs = n()) # How many observations per WBE and type? # Order the previous output per n_obs (descending order), wbe (ascending order) and type (alphabetically). partridge %>% group_by(wbe, type) %>% summarise(n_obs = n()) %>% arrange(desc(n_obs), wbe, type, .by_group = FALSE) # For each month, return the wbe with the highest number of observations and report the number of observations for those wbe that month. partridge %>% group_by(month, wbe) %>% summarise(n_obs = n()) %>% arrange(month, desc(n_obs)) %>% slice(1) %>% ungroup() ``` ### Joppe's solution ``` r ### 1. How many observations per WBE? Save it as overview_partridge as we will use it in challenge 3. overview_partridge <- count(partridge, wbe) overview_partridge ### 2. How many observations per WBE and type? count(partridge, wbe, type) ### 3. Order the previous output per n_obs (descending order), wbe (ascending order) and type (alphabetically). partridge %>% count(wbe, type) %>% arrange(desc(n), wbe, type) ### 4. For each month, return the wbe with the highest number of observations and report the number of observations for those wbe that month. partridge %>% group_by(month, wbe) %>% count() %>% group_by(wbe) %>% summarise(n = max(n)) ``` ## INTERMEZZO ## Challenge 3 ### Raisa ```r WBE <- read_csv("./data/20230228/20230228_wbe_info.txt", na = "") overview_partridge %>% left_join(WBE, by = join_by(wbe)) WBE %>% right_join(overview_partridge, by = join_by(wbe)) WBE %>% anti_join(partridge, by = "wbe") overview_partridge %>% inner_join(WBE, by = join_by(wbe)) overview_partridge %>% full_join(WBE, by = join_by(wbe)) %>% replace_na(list(n_obs = 0)) ``` ### Pieter's solution ```r # read the game management units details WBE <- read_csv("./data/20230228/20230228_wbe_info.txt", na = "") # Add WBE details in WBE to overview_partridge (where available) ## I don't need to tell it to join by what column, as the data.frames only have ## one column with the same name overview_partridge %>% left_join(WBE, by = "wbe") # How are the columns ordered? Are the columns of overview_partridge on the # left? Try to put them on the right. inner_join(WBE, overview_partridge, keep = TRUE) %>% filter(!is.na(n)) # keep only partridge wbe rows ## this approach will drop rows with no wbe (with wbe is NA) ## check first if you actually have a match in WBE for all overview_partridge right_join(WBE, overview_partridge) ## I prefer reordering columns after a join in a seperate step overview_partridge %>% left_join(WBE, by = "wbe") %>% select(colnames(WBE), colnames(overview_partridge)) # Which WBEs are not in partridge, i.e. are not linked to any observation? anti_join(WBE, partridge) %>% pull(wbe) %>% unique # You don't need to use joins to get to this answer setdiff(pull(WBE,wbe), pull(partridge,wbe)) # Some observations have a missing value for column wbe and some WBEs have no # observations at all. Get rid of them while joining. inner_join(WBE, overview_partridge) # Now, join both again but retain ALL EXISTING WBEs. Set the number of # observations equal to 0 for all WBEs where no partridges were observed. full_join(overview_partridge, WBE) %>% mutate(n = case_when(is.na(n) ~ 0, .default = n)) %>% filter(!is.na(wbe)) ``` ### Nele's solution ``` # 3.1 Add WBE details in WBE to overview_partridge (where available) overview_partridge <- left_join(overview_partridge, WBE, by = "wbe") # 3.2 How are the columns ordered? Are the columns of overview_partridge on the left? Try to put them on the right. #overview_partridge <- overview_partridge %>% relocate(wbe, .after = province) %>% relocate(n, .after = wbe) WBE %>% right_join(overview_partridge, by = join_by(wbe)) #or right_join(WBE, overview_partridge, by = "wbe") %>% view() ``` ## Bonus challenge 1 ### Pieter's solution ```r # if there are a lot of columns to pivot, you can use tidyselect pivot_longer( ias, cols = c("english_name", "dutch_name", "french_name"), names_to = "language", values_to = "vernacular_name" ) %>% # regex magic is also possible instead mutate(language = stringr::str_remove(language, stringr::fixed("_name"))) %>% mutate(language = dplyr::case_match(language, "english" ~ "en", "dutch" ~ "nl", "french" ~ "fr")) ``` ### Raisa ```r ias <- ias %>% pivot_longer(cols = 2:4, values_to = "vernacular_name", names_to = "language", names_pattern = "(.*)_name") %>% mutate(language = ifelse(language == "dutch", "nl", str_sub(language, 1, 2))) ``` ## Bonus challenge 2 ### Example code Amber Stukje code wat ik een tijd geleden geschreven heb toen ik pas net met R begon te werken, grote kans dat er dus ruimte is voor verbetering. ``` r # Minimal reproducible example for coding club 20230228 # starting from a dataframe (result from other part of the process) with surface area per vegetation sub-categories for two different years and the difference in the area for these two years # goal is to summarize these values per simplified/main categories (and removing NAs). # Names of these simplified categories are given in a separate vector # and column names are given as variables (because it had to be executed for many different year combinations) # data df <- structure(list(Code = c("A", "B1", "B2", "I1", "I2", "I3", "I4", "U1", "U2", "W"), Habitat = c("Akker", "Wilg", "Berk", "Riet", "Lisdodde", "Mix Riet & Lisdodde", "Heen", "Ruigte type 1", "Ruigte type 2 ", "Water"), `2013` = c(NA, 0.049591440864666, NA, 0.042735889211246, 0.248358365784165, 0.0977590620561381, NA, NA, 0.559824776974386, 3.68812422563596), `2017` = c(NA, 0.049591440864666, NA, 0.042735889211246, 0.248358589051378, 0.0977590620561381, NA, NA, 0.352468819308082, 3.68812409798934), Verschil = c(NA, 0, NA, 0, 2.23267212828837e-07, 0, NA, NA, -0.207355957666304, -1.27646623937494e-07)), row.names = c(NA, -10L), class = "data.frame") categories_simple = c( "A" = "Akker", "B" = "Bos & struweel", "I" = "Riet", "M" = "Moeras", "P" = "Pionier", "U" = "Ruigte", "W" = "Water", "Z" = "Open zand" ) year1 <- "2013" year2 <- "2017" df %>% # get rid of na rows filter(!is.na(year1) & !is.na(year2) & !is.na(Verschil)) %>% # take only the letter part of the code using regex (to go from sub-categories to main categories) mutate(code_simple = str_extract(Code, "[A-Za-z]+")) %>% # group by main category and summarize the surface area values group_by(code_simple) %>% summarise(!!as.name(year1) := sum(!!as.name(year1)), !!as.name(year2) := sum(!!as.name(year2)), Verschil = sum(Verschil)) %>% # change the code of main categories to the names found in the vector given above mutate(Hoofdeenheid = recode(code_simple, !!!categories_simple)) %>% # change the position of the new column relocate(Hoofdeenheid, .after = code_simple) %>% # remove the code column select(-code_simple) ```