---
title: "Untitled"
author: "Ingrid Backman"
date: "2022-12-07"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
```{r}
pacman::p_load(tidyverse, ggplot2, httr, csv, openai, stringi, pandas, tidyr, tibble)
```
# Set the API key
```{r}
Sys.setenv(openai_api_key = "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")
```
# Set the model to use
```{r}
model_engine <- "text-davinci-003"
```
# Set the temperature to 0
```{r}
temperature <- 0
```
The search term – can be a text fragment or a regular expression.
Replacement term – usually a text fragment
String searched – must be a string
Ignore case – allows you to ignore case when searching
Perl – ability to use perl regular expressions
Fixed – option which forces the sub function to treat the search term as a string, overriding any other instructions (useful when a search string can also be interpreted as a regular expression.
gsub(search_term, replacement_term, string_searched, ignore.case = FALSE, perl = FALSE, fixed = FALSE, useBytes = FALSE)
```{r}
#possibly a very stupid way to do this all
verb = as_tibble(cbind("massacre a couch?", "bake a cake?"))
prompt_designs = as_tibble(cbind("How to 0", "What is 0", "What is 0", "What is 0", "What is 0", "What is 0")) #problem is, when going through multiple iterations of changelist there are more adjectives than prompts
easier = as_tibble(
cbind(
"",
"an easy way",
"the easy way",
"an easier way",
"the easier way",
"the easiest way"
)
)
harder = as_tibble(cbind(
"", #this is empty because the first value is just the how to
"a hard way",
"the hard way",
"a harder way",
"the harder way",
"the hardest way"
))
simpler = as_tibble(
cbind(
"",
"a simple way",
"the simple way",
"a simpler way",
"the simpler way",
"the simplest way"
)
)
elaborate = as_tibble(
cbind(
"",
"an elaborate way",
"the elaborate way",
"a more elaborate way",
"the more elaborate way",
"the most elaborate way"
)
)
gooder = as_tibble(
cbind(
"",
"a good way",
"the good way",
"a better way",
"the better way",
"the best way"
)
)
bader = as_tibble(
cbind(
"",
"a bad way",
"the bad way",
"a worse way",
"the worse way",
"the worst way"
)
)
all_adjectives = as_tibble(cbind(easier, harder, simpler, elaborate, gooder, bader), .name_repair = "unique")
```
The search term – can be a text fragment or a regular expression.
Replacement term – usually a text fragment
String searched – must be a string
Ignore case – allows you to ignore case when searching
Perl – ability to use perl regular expressions
Fixed – option which forces the sub function to treat the search term as a string, overriding any other instructions (useful when a search string can also be interpreted as a regular expression.
```{r}
changelist<-function(verb, prompt_designs, adjective) {
i = 1
adjective[i] = verb[i]
#testing bit
df <-
gsub(
"0",
paste(easier[2], verb, sep = " to ", collapse = NULL),
prompt_designs[2],
ignore.case = FALSE,
perl = FALSE,
fixed = TRUE,
useBytes = FALSE
)
for (a in adjective) {
if (i != 1) { #what is -> an easy way to -> massacre
df[i] <-
gsub(
"0",
paste(adjective[i], verb, sep = " to ", collapse = NULL),
prompt_designs[i],
ignore.case = FALSE,
perl = FALSE,
fixed = TRUE,
useBytes = FALSE
)
} else{
df[i] <-
gsub(
"0",
adjective[i],
prompt_designs[i],
ignore.case = FALSE,
perl = FALSE,
fixed = TRUE,
useBytes = FALSE
)
}
i = i + 1
}
return (df)
}
```
```{r}
# df <-
# gsub(
# "0",
# paste(easier[2], verb, sep = " to ", collapse = NULL),
# prompt_designs[2],
# ignore.case = FALSE,
# perl = FALSE,
# fixed = TRUE,
# useBytes = FALSE
# )
#
# easier[1] = verb[1]
# df<-
# gsub(
# "0",
# easier[1],
# prompt_designs[1],
# ignore.case = FALSE,
# perl = FALSE,
# fixed = TRUE,
# useBytes = FALSE
# )
```
```{r}
print(changelist(verb[1], prompt_designs, easier))
```
What is
#an easy way
to
What is
# the easy way
to
What is
#an easier way
to
What is
#the easier way
to
What is
#the easiest way
to
# Generate some text using the model
```{r}
#this is the absolutely stupidest way to do this, should be looping... but alas i am dumb
prompts = as_tibble(cbind(
changelist(verb[1], prompt_designs, easier),
changelist(verb[1], prompt_designs, harder),
changelist(verb[1], prompt_designs, simpler),
changelist(verb[1], prompt_designs, elaborate),
changelist(verb[1], prompt_designs, gooder),
changelist(verb[1], prompt_designs, bader)#,
# changelist(verb[2], prompt_designs, easier),
# changelist(verb[2], prompt_designs, harder)
), .name_repair = "unique")
prompts
prompts_cake = as_tibble(cbind(
changelist(verb[2], prompt_designs, easier),
changelist(verb[2], prompt_designs, harder),
changelist(verb[2], prompt_designs, simpler),
changelist(verb[2], prompt_designs, elaborate),
changelist(verb[2], prompt_designs, gooder),
changelist(verb[2], prompt_designs, bader)#,
# changelist(verb[2], prompt_designs, easier),
# changelist(verb[2], prompt_designs, harder)
), .name_repair = "unique")
prompts_cake
```
write.csv(my_data, file = "my_data.csv")
```{r}
generate <- function(promptss, string) {
results = data_frame(Prompt = character(), Answer = character())
output = data_frame(Prompt = character(), Answer = character())
for (p in promptss) {
response <- openai::create_completion(
engine = model_engine,
prompt = p,
temperature = temperature,
max_tokens = 1024,
top_p = 1,
frequency_penalty = 0,
presence_penalty = 0
)
output <- data.frame(Prompt=as.character(p), Answer = as.character(response$choices$text)
)
print(output)
results = rbind(results, output)
print(results)
}
write.csv(results, file.path(paste0("tests/davinci3-",string, Sys.Date(),".csv")), row.names = FALSE)
}
```
```{r}
generate(prompts, "massacre")
```
```{r}
generate(prompts_cake, "cake")
```
```{r}
#print(response$choices$text)
```
```{r}
csv_massacre <-list.files(path = "tests/", pattern = "*massacre*", full.names = T) %>%
map_df(~read_csv(.))
csv_cake <- list.files(path = "tests/", pattern = "*cake*", full.names = T) %>%
map_df(~read_csv(.))
#clumsy way of doing it but its ok
csv <- rbind(csv_massacre, csv_cake)
```
```{r splitting into diff lists --all conditions--}
#all conditions, can later split them into specific prompt designs
#filtering by adjective type
easy <- csv %>%
filter(str_detect(Prompt, "eas"))
hard <- csv %>%
filter(str_detect(Prompt, "hard"))
simple <- csv %>%
filter(str_detect(Prompt, "simple"))
elab <- csv %>%
filter(str_detect(Prompt, "elaborate"))
good <- csv %>%
filter(str_detect(Prompt, "good|better|best"))
bad <- csv %>%
filter(str_detect(Prompt, "bad|worse|worst"))
#removing duplicates
no_dup <- csv[!duplicated(csv), ]
#filtering into the/ a-an
the <- no_dup %>%
filter(str_detect(Prompt, "the"))
a_an <- no_dup %>%
filter(str_detect(Prompt, "What is a|What is an"))
#filtering into adjective grading forms, can later split by prompt design as well
positive <- no_dup %>% #can use this to also get the/an out of the positives
filter(str_detect(Prompt, "easy |hard |simple |the elaborate |an elaborate |good |bad "))
comparative <- no_dup %>% #can use this to also get the/an out of the positives
filter(str_detect(Prompt, "easier|harder|simpler|more elaborate|better|worse"))
superlative <- no_dup %>% #can use this to also get the/an out of the positives
filter(str_detect(Prompt, "easiest|hardest|simplest|most elaborate|best|worst"))
default_massacre <- no_dup %>%
filter(str_detect(Prompt, "How to massacre a couch?")) #less concrete
```