HackMD - Collaborative Markdown Knowledge Base

``` tab_src <- "y P N 37.41667 petit facile 34.41667 petit dur 27.16667 petit impossible 34.08333 petit facile 29.83333 grand dur 24.25000 grand impossible 28.41667 grand facile 23.16667 grand dur 26.00000 grand impossible" dat <- read.table( text = tab_src, header = TRUE, sep = " ") z <- split(dat, dat$P) z <- split(dat, list(dat$P, dat$N), sep = "_") # un df à partir des 6 elements ---- rbind(z[[1]], z[[2]], z[[3]], z[[4]], z[[5]], z[[6]]) # un df à partir des 6 elements par une boucle ---- out <- data.frame() for(elt in z){ out <- rbind(out, elt) } out mean(x = rnorm(100), trim = 0, na.rm = TRUE) args <- list(x = rnorm(100), trim = 0, na.rm = TRUE) do.call(mean, args) # la BELLE approche R base ---- # rbind(z[[1]], z[[2]], z[[3]], # z[[4]], z[[5]], z[[6]]) w <- do.call(rbind, z) row.names(w) <- NULL w # la vraie plus BELLE approche data.table ---- w <- rbindlist(z, use.names = TRUE) setDF(w) ``` ### Apply ``` aq <- airquality apply(aq, 2, mean, na.rm = TRUE) z <- apply(aq, 2, function(x){ c(mean = mean(x, na.rm = TRUE), sd = sd(x, na.rm = TRUE)) }) t(z) z <- apply(aq, 1, function(x){ sum(x, na.rm = TRUE) }) t(z) # compter les NA par lignes ou colonnes ----- is_na <- is.na(aq) # bof ----- apply(is_na, 1, sum) # mieux que bof ----- rowSums(is_na) colSums(is_na) ```` ### mapply ``` ma_fun <- function(x, nom_variable){ data.frame( variable = nom_variable, mean = mean(x, na.rm = TRUE), sd = sd(x, na.rm = TRUE), stringsAsFactors = FALSE) } # avec mapply w <- mapply(ma_fun, x = mtcars, nom_variable = colnames(mtcars), SIMPLIFY = FALSE) # avec for w <- list() for(j in seq_along(mtcars)){ x <- mtcars[[j]] nom_variable <- colnames(mtcars)[j] w[[j]] <- ma_fun(x = x, nom_variable = nom_variable) } # pour toutes les solutions w <- do.call(rbind, w) row.names(w) <- NULL w ``` ### Exo regex ``` library(data.table) tweets <- fread("data/coffeeTweets.csv") library(data.table) library(stringr) tweets <- fread("data/coffeeTweets.csv") # Filtrer les lignes où screen_name contient “cafe” ou “Coffee” tweets[grepl( "cafe|coffee", screen_name, ignore.case = TRUE), ] # Combien de tweets ont plus de 8 hashtags et le hashtag “coffeeaddict”. Utiliser # str_count pour compter les hashtags (qui sont séparés par des espaces): tweets[grepl("coffeeaddict", hashtags, ignore.case = TRUE), ][ str_count(hashtags, " ") > 7, ] tweets[grepl("#coffeeaddict", text, ignore.case = TRUE), ][ str_count(text, "#") > 8, ] # Parmis les noms (screen_name), combien ont au moins trois z successifs ? tweets[grepl( "[z]{3,}", screen_name, ignore.case = TRUE), ] tweets[ , hour:= str_extract(created_at, "[[:digit:]]+:[[:digit:]]+$") ] ``` ``` library(data.table) library(stringi) fruits <- c("apple", "apricot", "avocado", "banana", "bell pepper", "bilberry", "blackberry", "blackcurrant", "blood orange", "blueberry", "boysenberry", "breadfruit", "canary melon", "cantaloupe", "cherimoya", "cherry", "chili pepper", "clementine", "cloudberry", "coconut", "cranberry", "cucumber", "currant", "damson", "date", "dragonfruit", "durian", "eggplant", "elderberry", "feijoa", "fig", "goji berry", "gooseberry", "grape", "grapefruit", "guava", "honeydew", "huckleberry", "jackfruit", "jambul", "jujube", "kiwi fruit", "kumquat", "lemon", "lime", "loquat", "lychee", "mandarine", "mango", "mulberry", "nectarine", "nut", "olive", "orange", "pamelo", "papaya", "passionfruit", "peach", "pear", "persimmon", "physalis", "pineapple", "plum", "pomegranate", "pomelo", "purple mangosteen", "quince", "raisin", "rambutan", "raspberry", "redcurrant", "rock melon", "salal berry", "satsuma", "star fruit", "strawberry", "tamarillo", "tangerine", "ugli fruit", "watermelon") data_fruits <- data.table(nom = fruits, stringsAsFactors = FALSE) rm(fruits) data_fruits # > Pour chaque élément, calculer si oui ou non la chaîne de caractère an est détectée ? Ajouter le résultat dans une nouvelle colonne. data_fruits[, has_an := grepl("an", nom)] # > Indiquer dans une nouvelle colonne si le fruit commence par la lettre b. data_fruits[, start_with_b := grepl("^b", nom)] data_fruits[, ct_an := stri_count(nom, regex = "an")] data_fruits[, c("pos_start_an", "pos_end_an") := as.data.frame(stri_locate_first_regex(nom, "an"))] data_fruits[, c("tri") := substr(nom, start = 1, stop = 3)] data_fruits[, c("berry") := stri_extract_first_regex(nom, "(.*)(berry)$")] data_fruits[grepl("berry$", nom), c("berry") := gsub("(.*)(berry)$", "\\1\\2", nom)] data_fruits[, c("berry") := stri_extract_all_regex(nom, "(.*)(berry)$")] data_fruits[stri_detect(nom, regex = "berry$"), nom := "chuck"] data_fruits[, nom:=gsub("(.*)(berry)$", "chuck", nom)] ``` ## LE CODE ``` library(data.table) library(magrittr) library(stringi) library(readxl) read_munic_dat <- function(x){ id_cols <- c("ID_BVOTE", "SCRUTIN", "ANNEE", "TOUR", "DATE", "NUM_CIRC", "NUM_QUARTIER", "NUM_ARROND", "NUM_BUREAU") nb_cols <- c("NB_PROCU", "NB_INSCR", "NB_EMARG", "NB_VOTANT", "NB_BLANC", "NB_NUL", "NB_EXPRIM") dat <- read_xls(x) setDT(dat) measure_vars <- setdiff(colnames(dat), c(id_cols, nb_cols)) melt(dat, id.vars = id_cols, measure.vars = measure_vars, variable.name = "candidat", value.name = "score", variable.factor = FALSE) } read_municipales <- function(repertoire){ if(!dir.exists(repertoire)){ stop("le repertoire ", shQuote(repertoire), " n'existe pas.") } dat <- list.files(path = repertoire, recursive = TRUE, full.names = TRUE) if(length(dat) < 1){ stop("le repertoire ne contient pas de fichier a importer.") } dat <- data.table(nom_fichier = basename(dat), nom_repertoire = dirname(dat), nom_complet = dat) dat[, date := stri_extract_first_regex(nom_fichier, pattern = "[[:digit:]]{8}")] dat[, ar := stri_extract_first_regex(nom_fichier, pattern = "ardt_[[:digit:]]{2}")] x <- lapply(dat$nom_complet, read_munic_dat) x <- rbindlist(x, use.names = TRUE, fill = FALSE) setDF(x) x } z <- read_municipales("data/leg-paris-2020") z <- read_municipales("data/leg-paris-2021") ```

Syntax	Example	Reference
# Header	Header	基本排版
- Unordered List	Unordered List
1. Ordered List	Ordered List
- [ ] Todo List	Todo List
> Blockquote	Blockquote
Bold font	Bold font
Italics font	Italics font
~~Strikethrough~~	~~Strikethrough~~
19^th^	19^th
H~2~O	H₂O
++Inserted text++	Inserted text
==Marked text==	Marked text
[link text](https:// "title")	Link
![image alt](https:// "title")	Image
`Code`	`Code`	在筆記中貼入程式碼
```javascript var i = 0; ```	`var i = 0;`
:smile:		Emoji list
{%youtube youtube_id %}	Externals
$L^aT_eX$	L^aT_eX
:::info This is a alert area. :::	This is a alert area.