---
tags: Tartu_Texts101_R
---
# Jagatud märkmed - (HVEE.01.006)
Püsilink: https://hackmd.io/@OGZFb2mRSA65ybyn4hSfkQ/BJFwvc4li/edit

Pildi jagamiseks paremklõps pildi peal (all paremal nurgas) -> copy image.
Ja CTRL+V (CMD+V) siin tekstikastis. Hackmd laeb faili üles ja siis kuvab lingi markdown formaadis. Joonise järgi saab kirjutada oma nime.

Joonis 1. Näitepilt
## 03.04.2023
```
edetabel %>%
filter(language=="en") %>%
filter(year <2000 &year >1989)
```
Proovi ka teada saada, mitu lugu oli mis keeles 1994 aastal ja 2014 aastal.
```
edetabel %>%
filter(year == 1994 | year == 2014) %>%
count(year, language)
```
## 10.04.2023
```
laulusonad %>%
left_join(stopsonad2,by="word") %>%
group_by(artist,song) %>%
filter(onstopsona==TRUE) %>%
count(word) %>%
mutate(proportsioon=n/sum(n)) %>%
arrange(desc(proportsioon)) %>%
filter(artist=="Winny Puhh")
laulusonad %>%
filter(artist=="Winny Puhh") %>%
left_join(stopsonad2,by="word") %>%
filter(onstopsona==T) %>%
count(word,sort=T)
laulusonad %>%
filter(artist=="Winny Puhh") %>%
inner_join(stopsonad,by="word") %>%
count(word,sort=T)
```
```
laulusonad %>%
left_join(stopsonad2,by="word") %>%
filter(is.na(onstopsona))
laulusonad %>%
left_join(stopsonad2,by="word") %>%
filter(onstopsona==NA)
```
## 17.04.2023

Näidisgraafik
## 08.05.2023
```
peatykid_sonad2 %>%
filter(chapter<11) %>%
anti_join(stopwords,by="word") %>%
filter(!str_detect(word,"[A-ZÕÄÖÜ]")) %>%
ungroup() %>%
mutate(row_number=row_number()) %>%
filter(row_number<11)
raamat1_sonad2 %>%
filter(chapter<11) %>%
anti_join(stopwords,by="word") %>%
filter(!str_detect(word,"[A-ZÕÄÖÜ]")) %>%
count(word,sort=T) %>%
mutate(row_number=row_number()) %>%
filter(row_number<11)
```
## 15.05.2023
```
Sys.setenv(VROOM_CONNECTION_SIZE = "500000000")
filelist <- list.files("data/uiboaed_ilukirjandus/soned",full.names=T)[1:29]
texts <- map_df(filelist, ~ tibble(txt = read_lines(.x)) %>%
mutate(filename = .x)) %>%
mutate(filename= basename(filename))
```
```
words %>%
count(word,sort=T) %>%
mutate(rank=row_number()) %>%
filter(rank<11) %>%
ggplot(aes(y=word,x=n))+
geom_col()
words %>%
group_by(filename) %>%
count(word,sort=T)
```
```
words %>%
filter(filename=="Andres_Saal_Vambola.utf8") %>%
count(word,sort=T) %>%
mutate(rank=row_number()) %>%
filter(rank<11)
words %>%
group_by(filename) %>%
count(word,sort=T) %>%
mutate(rank=row_number()) %>%
filter(rank<11)
```

```
words %>%
group_by(filename) %>%
count(word,sort=T) %>%
mutate(freq=n/sum(n)) %>%
filter(word=="töö") %>%
#filter(row_number()<10) %>% #sort pani need juba õigessejärjekorda
ggplot(aes(y=filename,x=freq,color=filename))+
geom_point()+
guides(color=F)
```
```
keywords <- texts %>%
unnest_tokens(word,txt,to_lower=F) %>%
mutate(word=str_replace_all(word,"w","v")) %>%
#anti_join(stopwords,"word") %>%
group_by(filename) %>%
filter(!str_detect(word,"[A-ZÕÄÖÜ]")) %>%
count(word,sort=T) %>%
bind_tf_idf(word,filename,n) %>%
arrange(desc(tf_idf)) %>%
head(100)
```