Portfolio 2 - HackMD

# Portfolio 2 --- title: "Reading Experiment" author: "Marie Thomsen" date: '2022-10-24' output: html_document --- # Defining setup chunk and working directory ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) knitr::opts_knit$set(root.dir = '/work/CogSci_Methods01/portolio_assignment_02-marievthomsen') ``` # Installing packages ```{r} library(tidyverse) library(gridExtra) library(pastecs) library(stringi) library(WRS2) ``` #Preparing data ```{r} #Loading the data files <- list.files(path = 'logfile_new.zip', pattern = ".csv",full.names = T) ``` ```{r} #Anonymising the data data_out <- list() num_files <- length(files) rand_ids <- sample(seq(1,num_files,1)) cnt_f <- 0 for (f in files){ cnt_f <- cnt_f + 1 data_out[[f]] <- read_csv(file = f, col_names = TRUE) data_out[[f]]$ID <- paste(c("snew", rand_ids[cnt_f]), collapse = "") out_name <- paste(c('/work/CogSci_Methods01/portolio_assignment_02-marievthomsen/logfile_new.zip', "/logfile_", unique(data_out[[f]]$ID[1]), ".csv"), collapse = "") write_csv(data_out[[f]], out_name, na = "NA") file.remove(f) } ``` ```{r} #Creating a dataframe files <- list.files(path = getwd(), pattern = "*logfile_snew*", full.names = T) data <- map_dfr(files, read_csv) ``` ## Cleaning data ```{r} #Changing names of columns names(data)[6] <- 'reaction_time' names(data)[8]<- 'word_length' names(data)[1]<- 'column_1' names(data)[7]<- 'condition' #Removing punctuation from words in the data frame data$Word <- sub(",","",data$Word) data$Word <- sub("\\.","",data$Word) data$word_length <- nchar(data$Word) #Ensuring ID and Condition are factors data$condition <-as.factor(data$condition) data$ID <- as.factor(data$ID) ``` ## Aggregating by test subject to detect irregularities ```{r} ggplot(data, aes(x = column_1, y = reaction_time, colour = ID)) + geom_point() ``` ```{r} ggplot(data, aes(x = ID, y = reaction_time, fill = ID)) + geom_boxplot(outlier.shape = NA) + coord_cartesian(ylim = c(0.1, 1.5)) + stat_summary(fun = mean, geom = "point", shape = 23) ``` #Correlation section ##Preparing Data ```{r} #New data frame where the rows with chopsticks and socks are eliminated as.factor(data$Word) data_filter <- filter(data, Word != "socks") data_filter <- filter(data_filter, Word != "chopsticks") #Adding transformations to the data frame data_filter <- data_filter %>% mutate(log_reaction_time = log(reaction_time), sqrt_reaction_time = sqrt(reaction_time), reaction_time_1 = (1/reaction_time)) #Creating a column with ordinal word number ordinal_number <- data_filter$column_1 + 1 data_filter <- mutate(data_filter, ordinal_number) ``` ## Testing for normality in reaction time ```{r} #Probability density histogram of reaction time ggplot(data_filter, aes(x = reaction_time)) + geom_histogram(aes(y = ..density..), binwidth = 0.25) + ggtitle("Probability Density of Reaction Time") + stat_function(fun = dnorm, args = list(mean = mean(data_filter$reaction_time, na.rm = TRUE), sd = sd(data_filter$reaction_time, na.rm = TRUE)), colour= "red", size = 1) + theme_classic() + xlim(range(data_filter$reaction_time)) #QQ-plot qqnorm(data_filter$reaction_time) qqline(data_filter$reaction_time) #Shapiro-Wilks test shapiro.test(data_filter$reaction_time) ``` ## Transforming the data ```{r} data_transform <- select(data_filter, reaction_time) %>% mutate(log_reaction_time = log(reaction_time), sqrt_reaction_time = sqrt(reaction_time), reaction_time_1 = (1/reaction_time)) round(stat.desc(data_transform, norm = TRUE), digits = 2)[c("skew.2SE", "kurt.2SE", "normtest.p"), ] ``` ### Transformation using logarithm ```{r} ggplot(data_filter, aes(x = log_reaction_time)) + geom_histogram(aes(y = ..density..), binwidth = 0.25, color= 'plum4', fill = 'lightblue2') + stat_function(fun = dnorm, args = list( mean = mean(data_filter$log_reaction_time, na.rm = TRUE), sd = sd(data_filter$log_reaction_time, na.rm = TRUE)), colour = "red", size = 1) ggplot(data_filter, aes(sample = log_reaction_time)) + stat_qq() + stat_qq_line() + labs(x = "Theoretical quantiles", y = "Sample quantiles") ``` ### Transformation using squareroot ```{r} ggplot(data_filter, aes(x = sqrt_reaction_time)) + geom_histogram(aes(y = ..density..), binwidth = 0.25, color= 'plum4', fill = 'lightblue2') + stat_function(fun = dnorm, args = list( mean = mean(data_filter$reaction_time_1, na.rm = TRUE), sd = sd(data_filter$reaction_time_1, na.rm = TRUE)), colour = "red", size = 1) ggplot(data_filter, aes(sample = reaction_time_1)) + stat_qq() + stat_qq_line() + labs(x = "Theoretical quantiles", y = "Sample quantiles") ``` ### Transformation using reciprocity ```{r} ggplot(data_filter, aes(x = reaction_time_1)) + geom_histogram(aes(y = ..density..), binwidth = 0.25, color= 'plum4', fill = 'lightblue2') + stat_function(fun = dnorm, args = list( mean = mean(data_filter$sqrt_reaction_time, na.rm = TRUE), sd = sd(data_filter$sqrt_reaction_time, na.rm = TRUE)), colour = "red", size = 1) ggplot(data_filter, aes(sample = sqrt_reaction_time)) + stat_qq() + stat_qq_line() + labs(x = "Theoretical quantiles", y = "Sample quantiles") ``` ### Word length vs reaction time ```{r} #Creating a scatter plot with word length and the reaction time ggplot(data_filter, aes(x = word_length, y = reaction_time)) + geom_point() + geom_smooth(method = lm, se=TRUE, colour = 'red') + labs(x = 'Word Length', y = 'Reaction Time') + ggtitle('Word Length vs Reaction Time') + theme_minimal() ``` #### Testing correlational assumption ```{r} cor.test(data_filter$word_length, data_filter$reaction_time, method = 'kendall') ``` ### Word frequency vs reaction time #### Preparing data ```{r} #Loading in our data frame with word frequency values wf <- read_csv("Word_frequency.csv") #Removing punctuation from the data frame wf$Word <- sub(",","",wf$Word) wf$Word <- sub("\\.","",wf$Word) #Changing the name of the second column in word frequency data frame names(wf)[2] <- 'frequency' #Creating a new data frame with word frequencies for each word wf <- wf %>% select(Word,frequency) %>% distinct() data_filter <- data_filter %>% inner_join(wf) #Creating data frame with mean reaction time of each word mean_rt <- data_filter %>% group_by(Word) %>% summarise(mean_rt = mean(reaction_time), frequency) %>% distinct() ``` #### Plotting mean reaction time in scatterplot ```{r} #Creating scatter plot with assigned word frequency and reaction time ggplot(mean_rt, aes(x = frequency, y = mean_rt)) + geom_point() + geom_smooth(method = lm, se=TRUE, colour = 'red') + labs(x = 'Word Frequency', y = 'Reaction Time') + ggtitle('Word Frequency vs Reaction Time') + theme_minimal() ``` #### Testing correlational assumption ```{r} cor.test(data_filter$reaction_time, as.numeric(data_filter$frequency), method = 'spearman') ``` ### Ordinal word number vs reaction time ```{r} #Creating scatter plot with ordinal word number and logged reaction time ggplot(data_tf, aes(x = ordinal_number, y = reaction_time)) + geom_point() + geom_smooth(method = lm, se=TRUE, colour = 'red') + labs(x = 'Ordinal Word Number', y = 'Logged Reaction Time') + ggtitle('Ordinal Word Number vs Reaction Time') + theme_minimal() ``` #### Testing correlational assumption ```{r} cor.test(data_filter$ordinal_number, data_filter$reaction_time, method = 'spearman') ``` # Hypothesis Testing Null-hypothesis H0: there is no difference in the mean response time between the experimental and control condition Alternative hypothesis HA: there is a difference in the mean response time between the experimental and control condition ## Preparing the data ```{r} #New data frame with the control word and the following word in the control condition data_control <- filter(data, column_1 == 128 | column_1 == 129, condition == "Control") #New data frame with the semantically surprising word and the following word in the experimental condition data_experimental <- filter(data, column_1 == 128 | column_1 == 129, condition == "Experimental") ``` ```{r} WRS2::yuen(reaction_time ~ condition, data = data_control) # computed on sample.

Syntax	Example	Reference
# Header	Header	基本排版
- Unordered List	Unordered List
1. Ordered List	Ordered List
- [ ] Todo List	Todo List
> Blockquote	Blockquote
Bold font	Bold font
Italics font	Italics font
~~Strikethrough~~	~~Strikethrough~~
19^th^	19^th
H~2~O	H₂O
++Inserted text++	Inserted text
==Marked text==	Marked text
[link text](https:// "title")	Link
![image alt](https:// "title")	Image
`Code`	`Code`	在筆記中貼入程式碼
```javascript var i = 0; ```	`var i = 0;`
:smile:		Emoji list
{%youtube youtube_id %}	Externals
$L^aT_eX$	L^aT_eX
:::info This is a alert area. :::	This is a alert area.