---
tags: workshops
---
# Notes from the workshop 23.08.2021 Tallinn
## Introduction to R and Tidyverse (Peeter Tinits)
## Materials
- Code&data: https://github.com/peeter-t2/IntroR_DIGMET2021
- Self-link: http://tiny.cc/introR_digmet2021_notes
- Slides: http://tiny.cc/introR_digmet2021_slides
### some notes:
1!=1
1!=2
!!1==1
!(1==2)
> 10 %in% 1:10
[1] TRUE
> 5 %in% 1:10
[1] TRUE
> 15 %in% 1:10
[1] FALSE
### Working with data
gapminder %>%
filter(country=="Germany")
# 1. Poland
gapminder %>%
filter(country=="Poland")
# 2.
gapminder %>%
filter(country=="Poland") %>%
filter(year==1972)
# 3.
gapminder %>%
filter(continent=="Europe")
# 4.
gapminder %>%
filter(continent=="Europe") %>%
filter(lifeExp>75) %>%
filter(year>1970,year<1980)
Great!
gapminder %>%
filter(gdpPercap>30000)
gapminder %>%
filter(country=="Finland"|country=="Germany"|country=="Kuwait") %>%
ggplot(aes(x=year,y=gdpPercap, color=country))+
geom_point()
---
# 1. The country and year with the highest gdp per capita.
gapminder %>%
arrange(desc(gdpPercap))
# 2. The trends in gdpPercap and life expectancy in that country. When was it lowest?
gapminder %>%
filter(country=="Kuwait") %>%
arrange(gdpPercap,lifeExp)
gapminder %>%
filter(country=="Kuwait") %>%
arrange(lifeExp)
# 3. The top 10 countries in all time life expectancy.
gapminder %>%
arrange(desc(lifeExp)) %>%
select(country) %>%
unique()
# 4. An overview of the population of India
gapminder %>%
filter(country=="India") %>%
select(year,pop)
seethis <- gapminder %>%
group_by(country) %>%
mutate(sequencecountry=row_number())
seethis %>%
filter(sequencecountry==2)
gapminder %>%
group_by(country) %>%
mutate(meanlife=mean(lifeExp)) %>%
select(country,meanlife) %>%
unique()
gapminder %>%
group_by(year) %>%
mutate(meanlife=mean(lifeExp)) %>%
select(year,meanlife) %>%
unique()
gapminder %>%
group_by(country) %>%
mutate(maxlife=max(lifeExp)) %>%
filter(lifeExp==maxlife)
gapminder %>%
group_by(country) %>%
summarise(pop=max(pop))
gapminder %>%
group_by(country) %>%
summarise(lifeExp=max(lifeExp))
gapminder %>%
group_by(year) %>%
summarise(lifeExp=mean(lifeExp))
# 1. Make a new variable row number.
gapminder %>%
mutate(rownumber=1:1704)
gapminder %>%
mutate(rownumber=row_number())
gapminder %>%
group_by(country) %>%
mutate(rownumber=row_number())
# 2. Pick a year, group the dataset by continent, sort it by life expectancy, and create a new variable: the relative position of a country within the continent.
gapminder %>%
filter(year==2002) %>%
group_by(continent) %>%
arrange(lifeExp) %>%
mutate(rownumber=row_number())
gapminder %>%
filter(year==2002) %>%
group_by(continent) %>%
arrange(lifeExp) %>%
mutate(rownumber=row_number()) %>%
View()
# 3. Find the top 3 countries within each continent.
table %>%
filter(rownumber<4)
table %>%
filter(rownumber%in%1:3)
gapminder %>%
filter(year==2002) %>%
group_by(continent) %>%
arrange(lifeExp) %>%
mutate(rownumber=row_number()) %>%
filter(rownumber%in%1:3)
# 4. Find the maximal gdp per continent.
gapminder %>%
group_by(continent) %>%
mutate(gdp=gdpPercap*pop) %>%
summarise(maxgdp=max(gdpPercap))
gapminder %>%
group_by(continent) %>%
mutate(gdp=gdpPercap*pop) %>%
summarise(maxgdp=max(gdp))
gapminder %>%
group_by(continent) %>%
summarise(maxgdp=max(gdpPercap*pop))
### Bugfix
instead of perpop, must be cell_phones_perpop
# Let's plot this to have a nice overview
cellphonepop %>%
mutate(cell_phones_perpop=cell_phones_total/population_total) %>%
rename(country=geo) %>%
inner_join(simple_meta %>% filter(world_4region=="europe"),by="country")%>%
ggplot(aes(x=time,y=cell_phones_perpop))+
geom_point()
cellphonepop %>%
mutate(cell_phones_perpop=cell_phones_total/population_total) %>%
rename(country=geo) %>%
anti_join(simple_meta %>% filter(world_4region=="europe"),by="country")%>%
ggplot(aes(x=time,y=cell_phones_perpop))+
geom_point()
## combining tables
Look at this table while in edit mode.


youthliteracy <- read_csv("data/ddf--datapoints--literacy_rate_youth_total_percent_of_people_ages_15_24--by--geo--time.csv")
lifeexpectancy <- read_csv("data/ddf--datapoints--life_expectancy_years--by--geo--time.csv")
common <- youthliteracy %>%
inner_join(lifeexpectancy,by=c("geo","time"))
common3 <- common %>%
inner_join(cellphonepop,by=c("geo","time"))
common3 %>%
ggplot(aes(x=literacy_rate_youth_total_percent_of_people_ages_15_24,y=life_expectancy_years))+
geom_point()
common3 %>%
filter(time==2000) %>%
ggplot(aes(x=literacy_rate_youth_total_percent_of_people_ages_15_24,y=life_expectancy_years))+
geom_point()
common3 %>%
filter(time==2000) %>%
ggplot(aes(x=literacy_rate_youth_total_percent_of_people_ages_15_24,y=life_expectancy_years))+
geom_point()
### Share a graph here.




## final thing
final <- cellphonepop %>%
mutate(cell_phones_perpop=cell_phones_total/population_total) %>%
rename(country=geo) %>%
inner_join(simple_meta,by="country")
final %>%
filter(world_4region=="asia") %>%
filter(time>1990) %>%
select(country,cell_phones_perpop,time) %>%
ggplot(aes(x=time,y=cell_phones_perpop,color=country))+
geom_point()