ENV710 Final - HackMD

# ENV710 Final | Response Variable | Unit | Description | | --------------------- | ---- | ------------------------------------------------ | | deforestation.percent | % | Percent of forest lost from 2010-2021 by country | |**Explanatory Variables**||**Description**| |solar.capacity|%|Installed renewable electricity capacity (MW) change ration between 2010 and 2020| |netzero.rc|dummy|Does a nation have a net zero target? (Net Zero, Zero Carbon, Carbon Neutral, Climate Neutral, Emission Reduction Target, Reduction) Net zero target = 1, No target = 0| |rural_change|%|rural population / total population change ration between 2010 and 2020 | |arable_change|%|Arable land (hectares per person) change ration between 2010 and 2020 | |gdppc_change|%|GDP per capita in US dollars change ration between 2010 and 2020| |regime|category|0 = Closed autocracy, 1-3 = Electoral autocracy, 4-6 = Electoral democracy, 7-9 = Liberal democracy| |agvalue_change|%|Agriculture, forestry, and fishing, value added (% of GDP) change ration between 2010 and 2020 | |protectedrea_change|%|Terrestrial and marine protected areas, % of total territorial area change ration between 2010 and 2020 | |schoolyear |year|School life expectancy, primary and lower secondary, both sexes (years) in 2015.|  --- title: "Final Project JJS" author: "Simon Sharp, Jon Ekberg, Jia Shen Tsai" output: html_document: df_print: paged --- ```{r environment setup} rm(list=ls()) options(scipen = 999) library(moments) library(knitr) library(tidyverse) library(gt) library(broom) library(scales) library(readr) library(dplyr) library(GGally) library(lmtest) library(car) library(wbstats) library(sjPlot) library(sjmisc) library(sjlabelled) library(olsrr) forest.df <- read_csv("deforestation.csv") forest.s <- forest.df %>% dplyr::select("Country Name", "Country ISO","Region", "Income Category", "Sum of Tree Cover Loss 2010-2021 (%)") %>% rename(deforestation.percent = "Sum of Tree Cover Loss 2010-2021 (%)") head(forest.s) na <- is.na(forest.s$deforestation.percent) ``` ```{r pledge data wrangling} nz.df <- read_csv("NetZeroCountries.csv") nz_s.df <- nz.df %>% dplyr::filter(actor_type == "Country") %>% select(country, end_target) nz_s.df$netzero.rc <- ifelse(nz_s.df$end_target == c("No target", "Other"), 0, 1) ``` ```{r join the pledge data} df1 <- left_join(forest.s, nz_s.df, by = c("Country ISO" = "country")) df1$netzero.rc[is.na(df1$netzero.rc)] <- 0 ``` ```{r} democracy <- read.csv("democracy.csv") dem.df <- democracy %>% dplyr::filter(Year == "2021") %>% rename(regime = regime_amb_row_owid) %>% select(Code, regime) df2 <- left_join(df1, dem.df, by = c("Country ISO" = "Code")) ``` ```{r rural & arable land} rural.df <- read.csv("rural_change.csv") rural.df <- rural.df %>% select(Country.Code,"rural_change") arable.df <- read.csv("arable_change.csv") arable.df <- arable.df %>% select(Country.Code,"arable_change") gdppc.df <- read.csv("gdppc_change.csv") gdppc.df <- gdppc.df %>% select(Country.Code,"gdppc_change") protectedarea.df <- read.csv("protectedarea_change.csv") protectedarea.df <- protectedarea.df %>% select(Country.Code,"protectedarea_change") ag.value.df <- read.csv("agvalue_change.csv") ag.value.df <- ag.value.df %>% select(Country.Code,agvalue.change) schoolyear.df <- read.csv("schoolyear.csv") schoolyear.df <- schoolyear.df %>% select(CountryCode, "X2015")%>% rename(schoolyear = "X2015") wb1 <- left_join(rural.df,arable.df, by = c("Country.Code" = "Country.Code")) wb2 <- left_join(wb1,gdppc.df, by = c("Country.Code" = "Country.Code")) wb3 <- left_join(wb2, protectedarea.df, by = c("Country.Code" = "Country.Code")) wb4 <- left_join(wb3, ag.value.df, by = c("Country.Code" = "Country.Code")) wb5 <- left_join(wb4, schoolyear.df, by = c("Country.Code" = "CountryCode")) wb6 <- dplyr::select(wb5, "Country.Code", rural_change, arable_change, gdppc_change, protectedarea_change, agvalue.change,schoolyear) df3 <- left_join(df2, wb6, by = c("Country ISO" = "Country.Code")) ``` ```{r read data} livestock.df <- read_csv("livestock.csv") livestock.s <- livestock.df %>% select("Country Code","livestock.change.2010-2020")%>% rename(livestock_change = "livestock.change.2010-2020") df4 <- left_join(df3, livestock.s, by = c("Country ISO" = "Country Code")) corruption.df <- read_csv("corruption.csv") corruption.s <- corruption.df %>% select("ISO3","CPI score 2021")%>% rename(corruption.index = "CPI score 2021") df5 <- left_join(df4, corruption.s, by = c("Country ISO" = "ISO3")) ``` ```{r} solar.cap <- read.csv("Solar Percent Change (2010-2021).csv") solar.cap.s <- solar.cap %>% select(ISO, "Solar.Percent.Change..from.2010.to.2021.") %>% rename("solar.capacity" = "Solar.Percent.Change..from.2010.to.2021.") df6 <- left_join(df5, solar.cap.s, by = c("Country ISO" = "ISO")) df6$solar.capacity <- as.numeric(df6$solar.capacity) df6 <- rename(df6, income.category = "Income Category") ``` ```{r lowincome subset} lowerincome.df <- df6%>% dplyr::filter(income.category == "Lower middle income" | income.category == "Low income") ``` ## summary ```{r} summ.col <- c(1,5,17,7,8,9,10,11,12,13,14,15,16) summary.df <- dplyr::select(df6, summ.col) tdf <- data.frame(t(summary.df[-1])) v <- as.list(summary.df$`Country Name`) colnames(tdf) <- v head(tdf) tdf <- tdf %>% rownames_to_column("Variables") tdf <- gather(tdf,"country","value",-Variables) head(tdf) summary.table <- tdf %>% group_by(Variables)%>% dplyr::summarize( obs=length(value[!is.na(value)]), max=round(max(value, na.rm=TRUE),3), min=round(min(value, na.rm=TRUE),3), mean=round(mean(value, na.rm=TRUE),3), median=round(median(value, na.rm=TRUE),3), sd=round(sd(value, na.rm=TRUE),3), skew=round(skewness(value, na.rm = TRUE),3) ) summary.table <- summary.table[c(4,12,5,10,2,1,6,8,3,7,11),] summary.table ``` ```{r see missing country} missing.bycountry<-tdf %>% group_by(country) %>% summarise(sumNA.value = sum(is.na(value))) %>% filter(sumNA.value > 0) missing.bycountry missing.all<-tdf %>% group_by(country,Variables) %>% summarise(sumNA.value = sum(is.na(value))) %>% filter(sumNA.value > 0) missing.all ``` ```{r summary table to gt} summary.table %>% gt() %>% tab_header( title = md("Summary Statistics of Variables, All Countries")) %>% fmt_passthrough (columns=c(`Variables`)) %>% fmt_number(columns = c(obs), decimals = 0) %>% fmt_number(columns = c(max), decimals = 2) %>% fmt_number(columns = c(min), decimals = 2) %>% fmt_number(columns = c(mean), decimals = 2) %>% fmt_number(columns = c(median), decimals = 2) %>% fmt_number(columns = c(sd), decimals = 2) %>% fmt_number(columns = c(skew), decimals = 2)%>% cols_label( Variables="Variable", obs = "Observations", max = "Maximum", min = "Minimum", mean = "Mean", median = "Median", sd = "SD", skew = "Skewness") ``` ```{r lowincome summary} low.summary.df <- dplyr::select(lowerincome.df, summ.col) low.tdf <- data.frame(t(low.summary.df[-1])) low.v <- as.list(low.summary.df$`Country Name`) colnames(low.tdf) <- low.v low.tdf <- low.tdf %>% rownames_to_column("Variables") low.tdf <- gather(low.tdf,"country","value",-Variables) low.summary.table <- low.tdf %>% group_by(Variables)%>% dplyr::summarize( obs=length(value[!is.na(value)]), max=round(max(value, na.rm=TRUE),3), min=round(min(value, na.rm=TRUE),3), mean=round(mean(value, na.rm=TRUE),3), median=round(median(value, na.rm=TRUE),3), sd=round(sd(value, na.rm=TRUE),3), skew=round(skewness(value, na.rm = TRUE),3) ) low.summary.table <- low.summary.table[c(4,12,5,10,2,1,6,8,3,7,11),] low.summary.table %>% gt() %>% tab_header( title = md("Summary Statistics of Variables, Lower-Income Countries")) %>% fmt_passthrough (columns=c(`Variables`)) %>% fmt_number(columns = c(obs), decimals = 0) %>% fmt_number(columns = c(max), decimals = 2) %>% fmt_number(columns = c(min), decimals = 2) %>% fmt_number(columns = c(mean), decimals = 2) %>% fmt_number(columns = c(median), decimals = 2) %>% fmt_number(columns = c(sd), decimals = 2) %>% fmt_number(columns = c(skew), decimals = 2)%>% cols_label( Variables="Variable", obs = "Observations", max = "Maximum", min = "Minimum", mean = "Mean", median = "Median", sd = "SD", skew = "Skewness") ``` ```{r missing lower income} l.missing.bycountry <- low.tdf %>% group_by(country) %>% summarise(sumNA.value = sum(is.na(value))) %>% filter(sumNA.value > 0) l.missing.bycountry ``` ## model ```{r warning= FALSE, message = FALSE} s1 <- c(17,7,8,9,10,11,12,13,14,15,16,5) all.model1 <- dplyr::select(df6, s1) ggpairs(all.model1, columns = 1:12, title = "All Area Original Scatterplot Matrix", axisLabels = "show") lowerincome.model1 <- dplyr::select(lowerincome.df, s1) ggpairs(lowerincome.model1, columns = 1:12, title = "Lower Income Countries Original Scatterplot Matrix", axisLabels = "show") ``` ```{r log solar capacity, warning= FALSE, message = FALSE} df6 <- df6 %>% mutate(log.solar.capacity = log(solar.capacity)) lowerincome.df <- lowerincome.df %>% mutate(log.solar.capacity = log(solar.capacity)) s2 <- c(18,7,8,9,10,11,12,13,14,15,16,5) all.model1 <- dplyr::select(df6, s2) ggpairs(all.model1, columns = 1:12, title = "All Area Log-Transformed Scatterplot Matrix", axisLabels = "show") lowerincome.model1 <- dplyr::select(lowerincome.df, s2) ggpairs(lowerincome.model1, columns = 1:12, title = "Lower Income Countries Log-Transformed Scatterplot Matrix", axisLabels = "show") ``` ```{r model0} v0 <- deforestation.percent ~ log.solar.capacity + gdppc_change + rural_change + arable_change + agvalue.change + livestock_change + protectedarea_change + corruption.index + netzero.rc + regime + schoolyear all.m0 <- lm(v0, data=df6, na.action = na.exclude) lowerincome.m0 <- lm(v0, data=lowerincome.df, na.action = na.exclude) summary(all.m0) summary(lowerincome.m0) ``` ```{r} v1 <- deforestation.percent ~ solar.capacity + gdppc_change + rural_change + arable_change + agvalue.change + livestock_change + protectedarea_change + corruption.index + schoolyear all.m1 <- lm(v1, data=df6, na.action = na.exclude) lowerincome.m1 <- lm(v1, data=lowerincome.df, na.action = na.exclude) summary(all.m1) summary(lowerincome.m1) ``` ``` {r} plot(all.m1,1) plot(lowerincome.m1,1) v.all.m1 <- vif(all.m1) v.lowerinome.m1 <- vif(lowerincome.m1) ``` ```{r log in the model} v1.log <- deforestation.percent ~ log.solar.capacity + gdppc_change + rural_change + arable_change + agvalue.change + livestock_change + protectedarea_change + corruption.index + schoolyear m1.log <- lm(v1.log, data=df6, na.action = na.exclude) lowerincome.m1.log <- lm(v1.log, data = lowerincome.df, na.action = na.exclude) summary(m1.log) summary(lowerincome.m1.log) ``` ```{r} plot(m1.log,1) plot(lowerincome.m1.log,1) v.m1.log <- vif(m1.log) v.lowerincome.m1 <- vif(lowerincome.m1) ``` ```{r} ols_test_normality(m1.log) ``` ### tables ```{r draw table for outset} tab_model(all.m0,lowerincome.m0, dv.labels = c("All Countries, Original", "Lower-Income Countries, Original"), show.se = TRUE, CSS = list(css.table = '+font-family: Arial;')) ``` ```{r draw table} tab_model(all.m1,lowerincome.m1,m1.log,lowerincome.m1.log, dv.labels = c("All Countries, Political Variables Removed", "Lower-Income Countries, Political Variables Removed","All Countries, Solar Logged","Lower-Income Countries, Solar Logged"), show.se = TRUE, CSS = list(css.table = '+font-family: Arial;')) ``` ```{r vif table} vif_df <- bind_cols(v.all.m1, v.lowerincome.m1) colnames(vif_df) <- c("Model 1: All Countries, Political Variables Removed","Model 2: Lower-Income Countries, Political Variables Removed") cn <- c("solar.capacity", "gdppc_change", "rural_change", "arable_change", "agvalue.change", "livestock_change", "protectedarea_change", "corruption.index", "schoolyear") vif_df <- bind_cols(vif_df, Variables = cn) vif_df <- vif_df[,c(3,1,2)] vif_df vif_log_df <- bind_cols(v.m1.log, v.lowerincome.m1) colnames(vif_log_df) <- c("Model 3: All Countries, Solar Logged","Model 4: Lower-Income Countries, Solar Logged") vif_log_df cn2 <- c("log.solar.capacity", "gdppc_change", "rural_change", "arable_change", "agvalue.change", "livestock_change", "protectedarea_change", "corruption.index", "schoolyear") vif_log_df <- bind_cols(vif_log_df, Variables = cn2) vif_log_df <- vif_log_df[,c(3,1,2)] super_vif_df <- full_join(vif_df,vif_log_df, by = "Variables") super_vif_df super_vif_df %>% gt() %>% tab_options( table.width = pct(80)) %>% cols_width( everything() ~ px(100)) %>% tab_header( title = md("VIF Test for All Models")) ``` ```{r bp test} bptest(all.m1) bptest(lowerincome.m1) bptest(m1.log) bptest(lowerincome.m1.log) bp_table <- data.frame( Model = c("Model 1: All Countries, Political Variables Removed","Model 2: Lower-Income Countries, Political Variables Removed","Model 3: All Countries, solar Logged","Model 4: Lower-Income Countries, Solar Logged"), BP = c(8.28,3.97,9.66,3.91), df = c(9,9,9,9), "p-value" = c(0.51,0.92,0.38,0.92) ) bp_table %>% gt() %>% tab_options( table.width = pct(80)) %>% cols_width( everything() ~ px(80)) %>% tab_header( title = md("Breusch-Pagan Test for All Models")) ``` ```{r AIC comparison, warning= FALSE, message = FALSE} AIC_cp <- AIC(all.m1,lowerincome.m1,m1.log,lowerincome.m1.log) AIC_cp$model <- c("Model1: All Countries, Political Variables Removed", "Model2:Lower-Income Countries, Political Variables Removed","Model3: All Countries, Solar Logged","Model4: Lower-Income Countries, Solar Logged") AIC_cp AIC_cp <- AIC_cp[,c(3,1,2)] ``` ```{r AIC comparison table, warning= FALSE, message = FALSE} AIC_cp %>% gt() %>% tab_options( table.width = pct(80)) %>% cols_width( everything() ~ px(80)) %>% tab_header( title = md("AIC Comparison")) %>% fmt_number(columns = c(df), decimals = 0) %>% fmt_number(columns = c(AIC), decimals = 2)%>% cols_label( model = "Model") ``` ### tropical ```{r tropical subset} tropical <- read.csv("tropical.csv") df7 <- df6 %>% left_join(tropical, by = c("Country Name"= "country")) %>% dplyr::filter(fullyTropical == "True" | fullyTropical == "false") ``` ```{r warning= FALSE, message = FALSE} tropicl.model1 <- dplyr::select(df7, s2) ggpairs(tropicl.model1, columns = 1:12, title = "Tropical Area Original Scatterplot Matrix", axisLabels = "show") ``` ```{r} tropical.m1 <- lm(v1.log, data=df7, na.action = na.exclude) summary(tropical.m1) ``` ```{r} plot(tropical.m1,1) vif(tropical.m1) ``` ```{r draw table option2} tab_model(all.m1,lowerincome.m1,m1.log,lowerincome.m1.log,tropical.m1, dv.labels = c("All Countries, Original", "Lower-Income Countries, Original","All Countries, Logged","Lower-Income Countries, Logged","Tropical Countries, Logged"), show.se = TRUE, CSS = list(css.table = '+font-family: Arial;')) ``` ### abandoned models ```{r pull regime out} v2 <- deforestation.percent ~ solar.capacity + gdppc_change + rural_change + arable_change + agvalue.change + livestock_change + protectedarea_change + corruption.index + netzero.rc + schoolyear all.m2 <- lm(v2, data=df6, na.action = na.exclude) lowerincome.m2 <- lm(v2, data=lowerincome.df, na.action = na.exclude) summary(all.m2) summary(lowerincome.m2) ``` ```{r assumption m2} plot(all.m2,1) plot(lowerincome.m2,1) plot(lowerincome.m2,4) bptest(lowerincome.m2) vif(lowerincome.m2) ``` ```{r pull rural population and arable area out} v3 <- deforestation.percent ~ solar.capacity + gdppc_change + agvalue.change + livestock_change + protectedarea_change + corruption.index + netzero.rc + regime + schoolyear all.m3 <- lm(v3, data=df6, na.action = na.exclude) lowerincome.m3 <- lm(v3, data=lowerincome.df, na.action = na.exclude) summary(all.m3) summary(lowerincome.m3) ``` ```{r interaction} df6 <- df6 %>% mutate(lvsk.corrup = livestock_change*corruption.index) lowerincome.df <- lowerincome.df %>% mutate(lvsk.corrup = livestock_change*corruption.index) v4 <- deforestation.percent ~ solar.capacity + gdppc_change + agvalue.change + protectedarea_change + livestock_change + corruption.index + lvsk.corrup + netzero.rc + regime + schoolyear all.m4 <- lm(v4, data=df6, na.action = na.exclude) lowerincome.m4 <- lm(v4, data=lowerincome.df, na.action = na.exclude) summary(all.m4) summary(lowerincome.m4) ``` ```{r, warning=FALSE} tab_model(all.m1,all.m2,all.m3,all.m4, dv.labels = c("All","Without Regime", "Without arable & rural.pop", "lvsk corruption interaction"), vcov.fun = "HC2", show.se = TRUE, CSS = list(css.table = '+font-family: Arial;')) tab_model(lowerincome.m1,lowerincome.m2,lowerincome.m3,lowerincome.m4, dv.labels = c("All","Without Regime", "Without arable & rural.pop", "lvsk corruption interaction"), show.se = TRUE, CSS = list(css.table = '+font-family: Arial;')) ```

Syntax	Example	Reference
# Header	Header	基本排版
- Unordered List	Unordered List
1. Ordered List	Ordered List
- [ ] Todo List	Todo List
> Blockquote	Blockquote
Bold font	Bold font
Italics font	Italics font
~~Strikethrough~~	~~Strikethrough~~
19^th^	19^th
H~2~O	H₂O
++Inserted text++	Inserted text
==Marked text==	Marked text
[link text](https:// "title")	Link
![image alt](https:// "title")	Image
`Code`	`Code`	在筆記中貼入程式碼
```javascript var i = 0; ```	`var i = 0;`
:smile:		Emoji list
{%youtube youtube_id %}	Externals
$L^aT_eX$	L^aT_eX
:::info This is a alert area. :::	This is a alert area.