# INBO CODING CLUB 28 April 2022 Welcome! ## Share your code snippet If you want to share your code snippet, copy paste your snippet within a section of three backticks (```): As an **example**: ``` library(tidyverse) ``` (*you can copy paste this example and add your code further down*) ## Yellow sticky notes No yellow sticky notes online. Put your name + " | " and add a "*" each time you solve a challenge (see below). ## Participants Raïsa | ** Yasmine | * Amber | ** ## Challenge 1 - Display the very first 8 rows. Display the very last 8 rows. veg_df %>% slice_head(n = 8) veg_df %>% slice_tail(n = 8) - Display columns Q1Code and Q1Description veg_df_Q1 <- veg_df %>% select(Q1Code, Q1Description) veg_df_Q1 - Display the distinct values of Q1Code and Q1Description. Do the same for Q2Code and Q2Description. veg_df_Q1_distinct <- veg_df %>% distinct(Q1Code, Q1Description) veg_df_Q1_distinct veg_df_Q2_distinct <- veg_df %>% distinct(Q2Code, Q2Description) veg_df_Q2_distinct - Count how many data are reliable and how many are not (NotSure = 0 or 1). veg_df %>% count(NotSure) veg_df %>% group_by(NotSure) %>% count() - How to remove not reliable data? veg_df <- veg_df %>% filter(NotSure == 0) ## Challenge 2 All Q2Description and all Name values contain the string "31xx_". Please, remove it from both columns. data %>% mutate(across(c(Q2Description, Name), ~str_remove(., "31xx_"))) %>% View() veg_df <- veg_df %>% mutate(Name = str_remove(Name, "31xx_"), Q2Description = str_remove(Q2Description, "31xx_")) Select rows where UserReference contains HGA, LIL or RIN veg_df %>% filter(str_sub(UserReference, 4, 6) %in% c("HGA", "LIL", "RIN")) The observers were anonymized and gender neutralized by using Gravity Falls characters. This is not needed anymore, so we can use the real names. data %>% mutate(Observer = str_replace_all(Observer, c("Dipper Pines" = "Hans Van Calster", "Wendy" = "Emma Cartuyvels", "Ford Pines" = "Dirk Maes", "Mabel Pines" = "Raïsa Carmen", "Robbie Valentino" = "Damiano Oldoni", "Gravity Falls" = "INBO coding club core team", "Grunkle Stan" = "Joost Vanoverbeke"))) %>% View() ``` # 1 All Q2Description and all Name values contain the string "31xx_". veg_df_21 <- veg_df %>% mutate(Q2Description = str_remove(Q2Description, "31xx_"), Name = str_remove(Name, "HT31xx_")) # 2 Select rows where UserReference contains HGA, LIL or RIN. Hint: combine dplyr and stringr. veg_df %>% filter(grepl("HGA|LIL|RIN", UserReference)) # 3 Change values in column Observer as follows: veg_df_2a3 <- veg_df %>% mutate(Observer = recode(Observer, "Dipper Pines" = "Hans Van Calster", "Wendy" = "Emma Cartuyvels", "Dipper Pines & Ford Pines" = "Hans Van Calster & Dirk Maes", "Dipper Pines & Wendy" = "Hans Van Calster & Emma Cartuyvels", "Dipper Pines & Mabel Pines" = "Hans Van Calster & Raïsa Carmen", "Dipper Pines & Robbie Valentino" = "Hans Van Calster & Damiano Oldoni", "Dipper Pines, Wendy, Grunkle Stan & Robbie Valentino" = "Hans Van Calster, Emma Cartuyvels, Joost Vanoverbeke & Damiano Oldoni", "Dipper Pines, Wendy & Mabel Pines" = "Hans Van Calster, Emma Cartuyvels & An Leyssen", "Gravity Falls" = "INBO coding club core team" )) ``` ``` #Joost, 2B extract_df <- data %>% filter(Q2Code == "Diept" | Q3Code == "Secch") %>% select(-c(Name, QualifierType, ParentID, QualifierResource, starts_with("Q1"))) %>% mutate(value = str_replace(Elucidation, ",", ".") %>% str_remove("max=") %>% str_squish() %>% as.numeric()) %>% select(-Elucidation) %>% mutate(NotSure = as.logical(NotSure)) %>% slice_max(value, n = 5) ``` ## INTERMEZZO Please, copy paste this in your script: ```r extract_df <- veg_df %>% filter(Q2Code == "Diept" | Q3Code == "Secch") %>% select(-c(Name, QualifierType, starts_with("Q1"), ParentID, QualifierResource)) %>% mutate(value = str_replace(Elucidation, ",", ".")) %>% mutate(value = str_remove(value, "max= ")) %>% mutate(value = as.numeric(value)) %>% select(-Elucidation) %>% mutate(NotSure = as.logical(.data$NotSure)) View(extract_df) # Get the rows from `extract_df` with the 5 deepest measuring points extract_df %>% filter(Q2Code == "Diept") %>% slice_max(value, n = 5) ## INTERMEZZO # During the [last # session](https://inbo.github.io/coding-club/sessions/20220329_tidy_data.html#1) # we sproke about tidy data. This dataset is not tidy, and so `extract_df`: the # watercourse depth and the Secchi depth should be spread over two columns as # they are two different variables. Here the code to create a tidy version of # `extract_df` called `extract_df_tidy` and which we will use for the rest of # the challenge: extract_df_tidy <- extract_df %>% tidyr::pivot_wider(names_from = Q2Code, values_from = c(Q2Description, Q3Code, Q3Description, value, NotSure)) ``` ## Challenge 3A ``` ### 2&3, Joost extract_df_tidy %>% group_by(RecordingGivid) %>% summarise(NotSure = any(NotSure_Helde, NotSure_Diept), RelWaterClarity = mean(value_Helde/value_Diept)) %>% ungroup() %>% mutate(RelWaterClarity = if_else(NotSure, NA_real_, RelWaterClarity)) %>% View() ``` ### 1 ``` extract_df_tidy %>% select(RecordingGivid, UserReference, Observer, ends_with("_Helde"), ends_with("_Depth"), everything()) %>% view() ``` ``` extract_df_tidy %>% relocate(RecordingGivid, UserReference, Observer, c(ends_with("_Helde")), c(ends_with("_Diept"))) ``` ``` #RAÏSA #Change the column order of extract_df_tidy extract_df_tidy <- extract_df_tidy %>% dplyr::relocate(RecordingGivid, UserReference, Observer, ends_with("_Helde"), ends_with("Dept")) #Column RelWaterClarity extract_df_tidy %>% group_by(RecordingGivid) %>% summarize(RelWaterClarity = value_Helde/value_Diept) %>% ungroup() #Set RelWaterClarity as NA if unreliable extract_df_tidy %>% group_by(RecordingGivid) %>% summarize(RelWaterClarity = ifelse(NotSure_Helde | NotSure_Diept , NA, value_Helde/value_Diept)) %>% ungroup() #Create a column called datetime extract_df_tidy <- extract_df_tidy %>% mutate(datetime = lubridate::as_datetime( str_remove(RecordingGivid,"IV"), format = "%Y%m%d%H%M%S") ) #Create a summary for each observer extract_df_tidy %>% group_by(Observer) %>% summarize(n = n(), max_depth = max(value_Diept, na.rm = TRUE), min_secch_depth = min(value_Helde, na.rm = TRUE), first_rec = min(datetime), last_rec = max(datetime)) %>% arrange(desc(n), desc(last_rec)) ```