HackMD - Collaborative Markdown Knowledge Base

--- title: "Untitled" author: "Ingrid Backman" date: "2022-12-07" output: html_document --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) ``` ```{r} pacman::p_load(tidyverse, ggplot2, httr, csv, openai, stringi, pandas, tidyr, tibble) ``` # Set the API key ```{r} Sys.setenv(openai_api_key = "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") ``` # Set the model to use ```{r} model_engine <- "text-davinci-003" ``` # Set the temperature to 0 ```{r} temperature <- 0 ``` The search term – can be a text fragment or a regular expression. Replacement term – usually a text fragment String searched – must be a string Ignore case – allows you to ignore case when searching Perl – ability to use perl regular expressions Fixed – option which forces the sub function to treat the search term as a string, overriding any other instructions (useful when a search string can also be interpreted as a regular expression. gsub(search_term, replacement_term, string_searched, ignore.case = FALSE, perl = FALSE, fixed = FALSE, useBytes = FALSE) ```{r} #possibly a very stupid way to do this all verb = as_tibble(cbind("massacre a couch?", "bake a cake?")) prompt_designs = as_tibble(cbind("How to 0", "What is 0", "What is 0", "What is 0", "What is 0", "What is 0")) #problem is, when going through multiple iterations of changelist there are more adjectives than prompts easier = as_tibble( cbind( "", "an easy way", "the easy way", "an easier way", "the easier way", "the easiest way" ) ) harder = as_tibble(cbind( "", #this is empty because the first value is just the how to "a hard way", "the hard way", "a harder way", "the harder way", "the hardest way" )) simpler = as_tibble( cbind( "", "a simple way", "the simple way", "a simpler way", "the simpler way", "the simplest way" ) ) elaborate = as_tibble( cbind( "", "an elaborate way", "the elaborate way", "a more elaborate way", "the more elaborate way", "the most elaborate way" ) ) gooder = as_tibble( cbind( "", "a good way", "the good way", "a better way", "the better way", "the best way" ) ) bader = as_tibble( cbind( "", "a bad way", "the bad way", "a worse way", "the worse way", "the worst way" ) ) all_adjectives = as_tibble(cbind(easier, harder, simpler, elaborate, gooder, bader), .name_repair = "unique") ``` The search term – can be a text fragment or a regular expression. Replacement term – usually a text fragment String searched – must be a string Ignore case – allows you to ignore case when searching Perl – ability to use perl regular expressions Fixed – option which forces the sub function to treat the search term as a string, overriding any other instructions (useful when a search string can also be interpreted as a regular expression. ```{r} changelist<-function(verb, prompt_designs, adjective) { i = 1 adjective[i] = verb[i] #testing bit df <- gsub( "0", paste(easier[2], verb, sep = " to ", collapse = NULL), prompt_designs[2], ignore.case = FALSE, perl = FALSE, fixed = TRUE, useBytes = FALSE ) for (a in adjective) { if (i != 1) { #what is -> an easy way to -> massacre df[i] <- gsub( "0", paste(adjective[i], verb, sep = " to ", collapse = NULL), prompt_designs[i], ignore.case = FALSE, perl = FALSE, fixed = TRUE, useBytes = FALSE ) } else{ df[i] <- gsub( "0", adjective[i], prompt_designs[i], ignore.case = FALSE, perl = FALSE, fixed = TRUE, useBytes = FALSE ) } i = i + 1 } return (df) } ``` ```{r} # df <- # gsub( # "0", # paste(easier[2], verb, sep = " to ", collapse = NULL), # prompt_designs[2], # ignore.case = FALSE, # perl = FALSE, # fixed = TRUE, # useBytes = FALSE # ) # # easier[1] = verb[1] # df<- # gsub( # "0", # easier[1], # prompt_designs[1], # ignore.case = FALSE, # perl = FALSE, # fixed = TRUE, # useBytes = FALSE # ) ``` ```{r} print(changelist(verb[1], prompt_designs, easier)) ``` What is #an easy way to What is # the easy way to What is #an easier way to What is #the easier way to What is #the easiest way to # Generate some text using the model ```{r} #this is the absolutely stupidest way to do this, should be looping... but alas i am dumb prompts = as_tibble(cbind( changelist(verb[1], prompt_designs, easier), changelist(verb[1], prompt_designs, harder), changelist(verb[1], prompt_designs, simpler), changelist(verb[1], prompt_designs, elaborate), changelist(verb[1], prompt_designs, gooder), changelist(verb[1], prompt_designs, bader)#, # changelist(verb[2], prompt_designs, easier), # changelist(verb[2], prompt_designs, harder) ), .name_repair = "unique") prompts prompts_cake = as_tibble(cbind( changelist(verb[2], prompt_designs, easier), changelist(verb[2], prompt_designs, harder), changelist(verb[2], prompt_designs, simpler), changelist(verb[2], prompt_designs, elaborate), changelist(verb[2], prompt_designs, gooder), changelist(verb[2], prompt_designs, bader)#, # changelist(verb[2], prompt_designs, easier), # changelist(verb[2], prompt_designs, harder) ), .name_repair = "unique") prompts_cake ``` write.csv(my_data, file = "my_data.csv") ```{r} generate <- function(promptss, string) { results = data_frame(Prompt = character(), Answer = character()) output = data_frame(Prompt = character(), Answer = character()) for (p in promptss) { response <- openai::create_completion( engine = model_engine, prompt = p, temperature = temperature, max_tokens = 1024, top_p = 1, frequency_penalty = 0, presence_penalty = 0 ) output <- data.frame(Prompt=as.character(p), Answer = as.character(response$choices$text) ) print(output) results = rbind(results, output) print(results) } write.csv(results, file.path(paste0("tests/davinci3-",string, Sys.Date(),".csv")), row.names = FALSE) } ``` ```{r} generate(prompts, "massacre") ``` ```{r} generate(prompts_cake, "cake") ``` ```{r} #print(response$choices$text) ``` ```{r} csv_massacre <-list.files(path = "tests/", pattern = "*massacre*", full.names = T) %>% map_df(~read_csv(.)) csv_cake <- list.files(path = "tests/", pattern = "*cake*", full.names = T) %>% map_df(~read_csv(.)) #clumsy way of doing it but its ok csv <- rbind(csv_massacre, csv_cake) ``` ```{r splitting into diff lists --all conditions--} #all conditions, can later split them into specific prompt designs #filtering by adjective type easy <- csv %>% filter(str_detect(Prompt, "eas")) hard <- csv %>% filter(str_detect(Prompt, "hard")) simple <- csv %>% filter(str_detect(Prompt, "simple")) elab <- csv %>% filter(str_detect(Prompt, "elaborate")) good <- csv %>% filter(str_detect(Prompt, "good|better|best")) bad <- csv %>% filter(str_detect(Prompt, "bad|worse|worst")) #removing duplicates no_dup <- csv[!duplicated(csv), ] #filtering into the/ a-an the <- no_dup %>% filter(str_detect(Prompt, "the")) a_an <- no_dup %>% filter(str_detect(Prompt, "What is a|What is an")) #filtering into adjective grading forms, can later split by prompt design as well positive <- no_dup %>% #can use this to also get the/an out of the positives filter(str_detect(Prompt, "easy |hard |simple |the elaborate |an elaborate |good |bad ")) comparative <- no_dup %>% #can use this to also get the/an out of the positives filter(str_detect(Prompt, "easier|harder|simpler|more elaborate|better|worse")) superlative <- no_dup %>% #can use this to also get the/an out of the positives filter(str_detect(Prompt, "easiest|hardest|simplest|most elaborate|best|worst")) default_massacre <- no_dup %>% filter(str_detect(Prompt, "How to massacre a couch?")) #less concrete ```

Syntax	Example	Reference
# Header	Header	基本排版
- Unordered List	Unordered List
1. Ordered List	Ordered List
- [ ] Todo List	Todo List
> Blockquote	Blockquote
Bold font	Bold font
Italics font	Italics font
~~Strikethrough~~	~~Strikethrough~~
19^th^	19^th
H~2~O	H₂O
++Inserted text++	Inserted text
==Marked text==	Marked text
[link text](https:// "title")	Link
![image alt](https:// "title")	Image
`Code`	`Code`	在筆記中貼入程式碼
```javascript var i = 0; ```	`var i = 0;`
:smile:		Emoji list
{%youtube youtube_id %}	Externals
$L^aT_eX$	L^aT_eX
:::info This is a alert area. :::	This is a alert area.