# Figure 1
### Packages
library(tidyverse) #for most of the important functions
library(gt) #making tables
library(janitor) #wrangling
library(patchwork) #plots
library(dplyr)
library(data.table) #renaming
### Cleaning up data
data1 <- read_csv("Impact of believing you've had covid-19 - dataset.csv", show_col_types = FALSE)
data1 <- data1 %>%
rename(think_covid = Ever_covid,
Age = age_categories,
Gender = gender) %>%
select(think_covid, Age, Gender, Age, degree, Has_child, Working, Key_worker, region) %>%
mutate(Gender = case_when(Gender == 1 ~ "Male",
Gender == 2 ~ "Female")) %>%
mutate(think_covid = case_when(think_covid == 1 ~ "Have COVID-19",
think_covid == 0 ~ "Have Not Had COVID-19")) %>%
mutate(degree = case_when(degree == 0 ~ "GSCE/vocational/A-level/No formal qualifications",
degree == 1 ~ "Degree or higher (Bachelors, Masters, PhD")) %>%
mutate(Age = case_when(Age == 1 ~ "18 to 24 years",
Age == 2 ~ "25 to 34 years",
Age == 3 ~ "35 to 44 years",
Age == 4 ~ "45 to 54 years",
Age == 5 ~ "55+")) %>%
mutate(Has_child = case_when(Has_child == 0 ~ "No",
Has_child == 1 ~ "Yes")) %>%
mutate(Working = case_when(Working == 0 ~ "Not Working",
Working == 1 ~ "Working")) %>%
mutate(Key_worker = case_when(Key_worker == 0 ~ "No",
Key_worker == 1 ~ "Yes")) %>%
mutate(region = case_when(region == 1 ~ "Midlands",
region == 2 ~ "South & East",
region == 3 ~ "North",
region == 4 ~ "London",
region == 5 ~ "Wales, Scotland and Northern Ireland"))
### Data frames
datagender <- data1 %>%
select(Gender, think_covid) %>%
arrange(factor(Gender, levels = c('Male', 'Female')))
tgender <- datagender %>%
tabyl(Gender, think_covid)
gtgender <- gt(tgender)
datagender <- gtgender$'_data'
#participants by age
data_Age <- data1 %>%
select(Age, think_covid)
tage <- data_Age %>%
tabyl(Age, think_covid)
gtage <- gt(tage)
dataage<- gtage$'_data'
#by child
datachild <- data1 %>%
select(Has_child, think_covid) %>%
filter(Has_child != "NA")
tchild <- datachild %>%
tabyl(Has_child, think_covid)
gtchild <- gt(tchild)
datachild <- gtchild$'_data'
#employment status
dataworking <- data1 %>%
select(Working, think_covid) %>%
filter(Working != "NA")
tworking <- dataworking %>%
tabyl(Working, think_covid)
gtworking <- gt(tworking)
dataworking <- gtworking$'_data'
#working key sector
tkey <- data1 %>%
tabyl(Key_worker, think_covid)
gtkey <- gt(tkey)
datakey <- gtkey$'_data'
#highest educational or professional qualification
datadegree <- data1 %>%
select(degree, think_covid) %>%
filter(degree != "NA")
tdegree <- datadegree %>%
tabyl(degree, think_covid)
gtdegree <- gt(tdegree)
datadegree <- gtdegree$'_data'
#region
tregion <- data1 %>%
tabyl(region, think_covid) %>%
gtregion <- gt(tregion)
dataregion <- gtregion$'_data'
### percentages
perGender <- with(data1, table(Gender, think_covid))
pGender <- prop.table(perGender, 1)*100
perAge <- with(data1, table(Age, think_covid))
pAge <- prop.table(perAge, 1)*100
perChild <- with(data1, table(Has_child, think_covid))
pChild <- prop.table(perChild, 1)*100
perEmp <- with(data1, table(Working, think_covid))
pEmp <- prop.table(perEmp, 1)*100
perKey <- with(data1, table(Key_worker, think_covid))
pKey <- prop.table(perKey, 1)*100
perEdu <- with(data1, table(degree, think_covid))
pEdu <- prop.table(perEdu, 1)*100
perRegion <- with(data1, table(region, think_covid))
pRegion <- prop.table(perRegion, 1)*100
### combining data
listfull <- list(datagender,
dataage,
datadegree,
datachild,
dataworking,
datakey,
dataregion)
listpercent1 <- list(pGender,pAge,pChild,pEmp,pKey,pRegion)
ptibble <- rbind(listpercent1)
ptibble <- rbind(pGender,pAge,pChild,pEmp,pKey,pEdu,pRegion)
ptibble <- as_tibble(ptibble)
ptibble1 <- ptibble %>%
rename("Think COVID" = "Have COVID-19",
"Think no COVID" = "Have Not Had COVID-19")
gtptib <- gt(ptibble1)
ptibble2 <- gtptib$'_data'
fulltibble <- rbindlist(listfull, use.name=FALSE)
fulltibble1 <- cbind(fulltibble, ptibble2)
### creating gt table
gtfull1 <- gt(fulltibble1) %>%
tab_spanner(
label = "Had COVID-19",
columns = c(2:3)
) %>%
cols_label(
Gender = "Level"
) %>%
cols_label(
"Have COVID-19" = "Think have had COVID-19 n = 1493 n (%)"
) %>%
cols_label(
"Have Not Had COVID-19" = "Think have not had COVID-19 n = 4656 n (%)"
) %>%
tab_row_group(
label = "Gender",
rows = 1:2
) %>%
tab_row_group(
label = "Age",
rows = 3:7
) %>%
tab_row_group(
label = "Highest educational or professional qualification",
rows = 8:9
) %>%
tab_row_group(
label = "Have a child",
rows = 10:11
) %>%
tab_row_group(
label = "Employment status",
rows = 12:13
) %>%
tab_row_group(
label = "Working in key sector",
rows = 14:15
) %>%
tab_row_group(
label = "Region",
rows = 16:20
) %>%
row_group_order(groups = c("Gender",
"Age",
"Have a child",
"Employment status",
"Working in key sector",
"Highest educational or professional qualification",
"Region")) %>%
gt_merge_stack(col1 = "Have COVID-19", col2 = "Think COVID"
) %>%
gt_merge_stack(col1 = "Have Not Had COVID-19", col2 = "Think no COVID"
)
# Figure 1
### Packages
library(tidyverse) #for most of the important functions
library(gt) #making tables
library(janitor) #wrangling
library(patchwork) #plots
library(dplyr)
library(data.table) #renaming
library(gtExtras)
### Cleaning up data
data1 <- read_csv("Impact of believing you've had covid-19 - dataset.csv", show_col_types = FALSE)
data1 <- data1 %>%
rename(think_covid = Ever_covid,
Age = age_categories,
Gender = gender) %>%
select(think_covid, Age, Gender, Age, degree, Has_child, Working, Key_worker, region) %>%
mutate(Gender = case_when(Gender == 1 ~ "Male",
Gender == 2 ~ "Female")) %>%
mutate(think_covid = case_when(think_covid == 1 ~ "Have COVID-19",
think_covid == 0 ~ "Have Not Had COVID-19")) %>%
mutate(degree = case_when(degree == 0 ~ "GSCE/vocational/A-level/No formal qualifications",
degree == 1 ~ "Degree or higher (Bachelors, Masters, PhD)")) %>%
mutate(Age = case_when(Age == 1 ~ "18 to 24 years",
Age == 2 ~ "25 to 34 years",
Age == 3 ~ "35 to 44 years",
Age == 4 ~ "45 to 54 years",
Age == 5 ~ "55+")) %>%
mutate(Has_child = case_when(Has_child == 0 ~ "No",
Has_child == 1 ~ "Yes")) %>%
mutate(Working = case_when(Working == 0 ~ "Not Working",
Working == 1 ~ "Working")) %>%
mutate(Key_worker = case_when(Key_worker == 0 ~ "No",
Key_worker == 1 ~ "Yes")) %>%
mutate(region = case_when(region == 1 ~ "Midlands",
region == 2 ~ "South & East",
region == 3 ~ "North",
region == 4 ~ "London",
region == 5 ~ "Wales, Scotland and Northern Ireland"))
### Data frames
datagender2 <- data1 %>%
select(Gender, think_covid)
tgender <- datagender2 %>%
tabyl(Gender, think_covid)
gtgender <- gt(tgender)
datagender <- gtgender$'_data' %>%
arrange(factor(Gender, levels = c('Male','Female')))
#participants by age
data_Age <- data1 %>%
select(Age, think_covid)
tage <- data_Age %>%
tabyl(Age, think_covid)
gtage <- gt(tage)
dataage<- gtage$'_data'
#by child
datachild <- data1 %>%
select(Has_child, think_covid) %>%
filter(Has_child != "NA")
tchild <- datachild %>%
tabyl(Has_child, think_covid)
gtchild <- gt(tchild)
datachild <- gtchild$'_data'
#employment status
dataworking <- data1 %>%
select(Working, think_covid) %>%
filter(Working != "NA")
tworking <- dataworking %>%
tabyl(Working, think_covid)
gtworking <- gt(tworking)
dataworking <- gtworking$'_data'
#working key sector
tkey <- data1 %>%
tabyl(Key_worker, think_covid)
gtkey <- gt(tkey)
datakey <- gtkey$'_data'
#highest educational or professional qualification
datadegree <- data1 %>%
select(degree, think_covid) %>%
filter(degree != "NA")
tdegree <- datadegree %>%
tabyl(degree, think_covid)
gtdegree <- gt(tdegree)
datadegree <- gtdegree$'_data' %>%
arrange(factor(degree, levels = c('GSCE/vocational/A-level/No formal qualifications','Degree or higher (Bachelors, Masters, PhD)')))
#region
tregion <- data1 %>%
tabyl(region, think_covid)
gtregion <- gt(tregion)
dataregion <- gtregion$'_data' %>%
arrange(factor(region, levels = c('Midlands', 'South & East', 'North', 'London', 'Wales, Scotland and Northern Ireland')))
### percentages
perGender <- with(data1, table(Gender, think_covid))
pGender <- prop.table(perGender, 1)*100
perAge <- with(data1, table(Age, think_covid))
pAge <- prop.table(perAge, 1)*100
perChild <- with(data1, table(Has_child, think_covid))
pChild <- prop.table(perChild, 1)*100
perEmp <- with(data1, table(Working, think_covid))
pEmp <- prop.table(perEmp, 1)*100
perKey <- with(data1, table(Key_worker, think_covid))
pKey <- prop.table(perKey, 1)*100
perEdu <- with(data1, table(degree, think_covid))
pEdu <- prop.table(perEdu, 1)*100
perRegion <- with(data1, table(region, think_covid))
pRegion <- prop.table(perRegion, 1)*100
### combining data
listfull <- list(datagender,
dataage,
datachild,
dataworking,
datakey,
datadegree,
dataregion)
fulltibble <- rbindlist(listfull, use.name=FALSE)
fulltibble <- as_tibble(fulltibble)
fulltibble1 <- fulltibble %>%
rename(ycovid = "Have COVID-19",
ncovid = "Have Not Had COVID-19")
ptibble <- rbind(pGender,pAge,pChild,pEmp,pKey,pEdu,pRegion)
ptibble <- as_tibble(ptibble)
ptibble <- data.frame(lapply(ptibble, function(x) if(is.numeric(x)) round(x,2) else x))
ptibble1 <- ptibble %>%
rename(ypercent = "Have.COVID.19",
npercent = "Have.Not.Had.COVID.19")
pfull <- cbind(fulltibble1, ptibble1)
pfull$havenocovid <- paste0(pfull$ncovid, " (", pfull$npercent, ")")
pfull$havecovid <- paste0(pfull$ycovid, " (", pfull$ypercent, ")")
pfull <- subset(pfull, select = c(Gender,havenocovid, havecovid)) %>%
rename(Level = Gender)
pchar <- c("Gender","","Age","","","","","Have a child","","Employment status","",
"Working in key sector","","Highest educational or professional qualification","",
"Region","","","","")
pfull1 <- cbind(pchar, pfull)
gtfull2 <- gt(pfull1) %>%
tab_spanner(
label = "Had COVID-19",
columns = c(3:4)
) %>%
cols_label(
pchar = "Participant characteristics"
) %>%
cols_label(
"havenocovid" = "Think have not had COVID-19 n = 4656 (%)"
) %>%
cols_label(
"havecovid" = "Think have had COVID-19 n = 1493 (%)"
) %>%
tab_style(cell_text(weight = "bold"),
locations = cells_column_labels(columns = everything())) %>%
tab_style(cell_text(weight = "bold"),
locations = cells_column_spanners())