rm(list = ls())
setwd('~/🪅Master/12_internship/2023 Global Climate Action report/Raw_data_formatted/data_2023/CDP Cities')
library(tidyverse)
x <- list.dirs("..")[grepl("clean data", list.dirs(".."), ignore.case = T)]
get_dirs <- function(path){
# Takes a path to the folder containing the cleaned data folders and
# returns a vector that contains all the folders with clean data
x <- list.dirs("..")[grepl("clean data", list.dirs(".."), ignore.case = T)]
return(x)
}
path = ".."
directory <- get_dirs(path)
get_files <- function(directory, path){
# Iterates through the folders containing clean data and reads them into
# the Global Env.
# Appends _ datasource name to the end of the table name
for (x in directory){
ds <- gsub(paste0(path, "/(.*?)/.*"), "\\1", x)
for (y in list.files(x)){
assign(paste0(gsub(("(.*?)\\.csv"), "\\1", y), "_", ds), read.csv(paste0(x, "/", y)),
envir = .GlobalEnv)
}
}
}
get_files(directory, path)
combine_df <- function(table_name){
# Creates and combines a table based on the individual data source tables
tables <- ls(.GlobalEnv, pattern = paste0(table_name, "_"))
if (length(tables) == 0){
print(paste0("No tables found for ", table_name))
}
df <- mget(tables, envir = .GlobalEnv) %>%
lapply(., function (x) mutate(x, across(names(x), ~as.character(.x)))) %>%
bind_rows() %>%
distinct()
return(df)
}
# Prioritise data for targets/emissions
Target <- combine_df('Target')
Actor <- combine_df('Actor')
Territory <- combine_df('Territory')
Target <- combine_df('Target') # 12591 obs. of 16 variables
Actor <- combine_df('Actor') # 18655 obs. of 11 variables
Territory <- combine_df('Territory') # 18353 obs. of 10 variables
Target_i <- Target %>%
select(target_id, actor_id, target_type, target_year, target_unit, target_value, baseline_value, baseline_year, net.emiss, baseline_unit, summary) # 12591 obs. of 11 variables
Target_I <- distinct(Target_i, target_id, .keep_all = TRUE) # 12157 obs. of 11 variables
Actor_I <- Actor %>%
select(actor_id, name, iso) # 18655 obs. of 3 variables
Territory_I <- Territory %>%
select(actor_id, area, lat, lng, City.Location, before) # 18353 obs. of 6 variables
#actor_id, target_type, target_year, target_unit, target_value, baseline_value, baseline_year, net.emiss, baseline_unit, summary by target_id in Target
#type, name, iso in Actor, by actor_id
#area, lat, lng, City.location, before in Territory, by actor_id
Target_a <- left_join(Target_I, Actor_I, by = "actor_id") # 15155 obs. of 13 variables
# indicating 2998 repetitive actor_id in Actor dataset
Target_b <- left_join(Target_a, Territory_I, by = "actor_id") # 23436 obs. of 18 variables
# indicating 5083 repetitive actor_id in Territory dataset
Target_Final <- distinct(Target_b, target_id, .keep_all = TRUE) # 12157 obs. of 18 variables
Target_F_2023 <- subset(Target_Final, target_year >= 2023)
write.csv(Target_F_2023, file = "target_2023.csv")
ta2022 <- read.csv('target_actor2022.csv')
ta2023 <- read.csv('target_actor2023.csv')
ta2022_n <- ta2022 %>%
subset(target_year >= 2022)
ta2023_n <- ta2023 %>%
subset(target_year >= 2023)
ta_ant <- anti_join(ta2022_n, ta2023_n, by = 'target_id')
ta_ant
ta2023_nn <- bind_rows(ta2023_n, ta_ant)
ta2023_nn # 5314 obs of 21 variables
write.csv(ta2023_nn, file = 'ta2023.csv')
## find the data that exist in JS's analysis but not in the 'target_actor2023.csv' file
A_comp <- anti_join(Target_F_2023, ta2023_nn, by = 'target_id') # 2012 obs. of 18 variables
write.csv(A_comp, file = 'comp2023.csv')
When run, the script create the random forest model and results for the given dataset.
Mar 14, 2024–
Mar 13, 2024For this lab on logistic regression, you will work through each of the chunks and respond to the questions. You should knit the lab to html (not Word). Please submit your knitted html and Rmd to Sakai by noon on November 21st.
Nov 9, 2023This tutorial will walk you through the steps of running a simple linear regression (SLR) in R. I encourage you to read about Yale’s Environmental Performance Index (EPI) here: https://epi.envirocenter.yale.edu/.
Oct 4, 2023or
By clicking below, you agree to our terms of service.
New to HackMD? Sign up