R
R code used to generate outcome demonstrated during knowledge hour presentation at GLC 21-04-2023
# Input and output directories
dir.C.drive <-"C:"
dir.R.packages <- file.path(dir.C.drive,"R","R-4.1.3")
dir.main <- file.path(dir.C.drive,"Google-Drive")
dir.GLC <- file.path(dir.main,"Greenlight-clinical")
dir.GLC.presentation <- file.path(dir.GLC,"Knowledge-hours-presentation")
dir.GLC.data <- file.path(dir.GLC.presentation, "data-download")
dir.GLC.P.drive <- file.path(dir.GLC,"P-drive")
dir.eTMF <- file.path(dir.GLC.presentation,"eTMF")
library(jpeg, lib.loc = dir.R.packages)
library(withr,lib.loc = dir.R.packages)
library(dplyr, lib.loc = dir.R.packages)
library(tidyr,lib.loc = dir.R.packages)
library(purrr, lib.loc = dir.R.packages)
library(foreach, lib.loc = dir.R.packages)
library(iterators, lib.loc = dir.R.packages)
library(doParallel, lib.loc = dir.R.packages)
library(qrcode, lib.loc = dir.R.packages)
library(backports, lib.loc = dir.R.packages)
library(tzdb, lib.loc = dir.R.packages)
library(ggplot2,lib.loc = dir.R.packages)
library(crayon,lib.loc = dir.R.packages)
library(tidyverse, lib.loc = dir.R.packages)
library(farver, lib.loc = dir.R.packages)
library(svglite,lib.loc = dir.R.packages)
library(grid,lib.loc = dir.R.packages)
library(labeling,lib.loc = dir.R.packages)
library(magrittr,lib.loc = dir.R.packages)
library(imager,lib.loc = dir.R.packages)
library(patchwork,lib.loc = dir.R.packages)
library(magick,lib.loc = dir.R.packages)
library(forcats,lib.loc = dir.R.packages)
library(cowplot,lib.loc = dir.R.packages)
library(fs,lib.loc = dir.R.packages)
library(data.tree,lib.loc = dir.R.packages)
library(sketcher,lib.loc = dir.R.packages)
library(rJava,lib.loc = dir.R.packages)
library(xlsx,lib.loc = dir.R.packages)
library(readxl,lib.loc = dir.R.packages)
#-----------------------
# List Excel sheets
#-----------------------
list.files(path = dir.GLC.data, pattern="*.xlsx", full.names = TRUE)
# There are 115 sheets. All sheets matched source data file
sheets.C200 <- readxl::excel_sheets(path=file.path(dir.GLC.data,"C-200-001_sdtm - Copy.xlsx"))
#length(sheets.C200) #115
# Some sheet names not seen in the source data file
sheets.invetus <- readxl::excel_sheets(path=file.path(dir.GLC.data,"AET P 20286 Collated Data - Copy.xlsx"))
#length(sheets.invetus) 101
#------------------------
# Write sheets into Excel
#------------------------
# Create a new Excel file with same sheets from "C-200-001_sdtm - Copy.xlsx"
note <- data.frame(note="Client please use existing sheet names")
new.excel.file.path <- file.path(dir.GLC.data,"C-300-001_sdtm.xlsx")
# Write data into first sheet
xlsx::write.xlsx(x=note
,file=new.excel.file.path
,sheetName=sheets.C200[1]
,append=FALSE)
# Add second to last sheets to the same Excel
lapply(X=2:length(sheets.C200)
,FUN=function(i) xlsx::write.xlsx(note
,file=new.excel.file.path
,sheetName=sheets.C200[[i]]
,append=TRUE))
#------------------------------------------
# Create Gantt chart for employment history
#------------------------------------------
# Input, output directory
task.1.input.folder.path <- file.path(dir.GLC.presentation,"images-download") # dir.exists(task.1.input.folder.path) [1] TRUE
task.1.output.folder.path <- file.path(dir.GLC.presentation,"gantt-charts") # dir.exists(task.1.output.folder.path) [1] TRUE
# Set end date of current employment to today
date.today <- as.character(Sys.Date())
# Create employment data
# Reshape it to long format which start dates and end dates are replaced by two new variables- variable, value
df <- as.data.frame(rbind(
c("University of Queensland\n PhD student", '2016-04-05', '2020-05-11',"past")
,c("QIMR Berghofer Medical Research Institute\n R programmer", '2020-04-27', '2021-11-09',"past")
,c("Greenlight Clinical\n Statistical programmer", '2021-09-27', date.today,"current"))) %>%
dplyr::select("task" = 1, "start" = 2, "end" = 3, "status"=4) %>%
dplyr::mutate(task = forcats::fct_rev(factor(task, levels = (task))),
start = as.Date(start),
end = as.Date(end)) %>%
dplyr::mutate(lab_pos = start) %>%
tidyr::pivot_longer(2:3, names_to = "variable", values_to = "value") # dim(df) [1] 6 4
# Change size of plotting window to 10:5 width to height
dev.new(width=10, height=5,noRStudioGD = TRUE)
# Plot employment as a gantt chart with Y axis showing every year
## [Changing line colors with ggplot()](https://stackoverflow.com/questions/5171263/changing-line-colors-with-ggplot)
gantt.chart <- ggplot2::ggplot(df, aes(x=value, y=task, color=status)) +
ggplot2::geom_line(linewidth = 20, show.legend = FALSE) +
# To modify color, use ggplot2::scale_color_manual(values=). values matched to color variable values current, past
ggplot2::scale_color_manual(values=c("red","grey50"))+
ggplot2::scale_x_date( labels = function(x) lubridate::year(x)
,date_breaks = '1 year'
,date_minor_breaks = "1 year"
,position = "top") +
ggplot2::labs(x = NULL,y = NULL) +
ggplot2::theme( axis.text.x=element_text(size=10)
,axis.text.y=element_text(size=12))
# Save employment gantt chart as a png file
## plot size needs to match those in dev.new()
ggplot2::ggsave( filename = file.path( task.1.output.folder.path,"employment-history.png")
,width = 10
,height = 5
,plot= print(gantt.chart) #
,device = "png")
# Read logo image files
logo.bash <- magick::image_read(file.path(task.1.input.folder.path, "BASH_full_colored_dark.jpg")) # class(logo.bash) [1] "magick-image"
logo.R <- magick::image_read(file.path(task.1.input.folder.path,"R-logo-Cluster-Analysis-with-R-Edureka.png"))
logo.SAS <- magick::image_read(file.path(task.1.input.folder.path,"SAS-logo.png"))
# Set base x y positions of the plot where logo images are added to
base.position.x.group.1 <- 0.475
base.position.y.group.1 <- 0.975
base.position.x.group.2 <- base.position.x.group.1+0.325
base.position.y.group.2 <- base.position.y.group.1-0.325
base.position.x.group.3 <- base.position.x.group.2+0.15
base.position.y.group.3 <- base.position.y.group.2-0.25
# Introduction to cowplot
# https://cran.r-project.org/web/packages/cowplot/vignettes/introduction.html
gantt.chart.final <- cowplot::ggdraw(gantt.chart) +
# Add logo images to employment 1 bar
cowplot::draw_image(logo.bash
,x = base.position.x.group.1
,y = base.position.y.group.1
,hjust = 1, vjust = 1, width = 0.1, height = 0.2) +
cowplot::draw_image(logo.R
,x = base.position.x.group.1 + 0.1
,y = base.position.y.group.1 - 0.04
,hjust = 1, vjust = 1, width = 0.1, height = 0.1) +
cowplot::draw_image(logo.SAS
,x = base.position.x.group.1+0.2
,y = base.position.y.group.1
,hjust = 1, vjust = 1, width = 0.1, height = 0.2)+
# Add logo images to employment 2 bar
cowplot::draw_image(logo.R
,x = base.position.x.group.2
,y = base.position.y.group.2
,hjust = 1, vjust = 1, width = 0.1, height = 0.1)+
cowplot::draw_image(logo.bash
,x = base.position.x.group.2+ 0.1
,y = base.position.y.group.2+0.05
,hjust = 1, vjust = 1, width = 0.1, height = 0.2)+
# Add logo images to employment 3 bar
cowplot::draw_image(logo.SAS
,x = base.position.x.group.3
,y = base.position.y.group.3
,hjust = 1, vjust = 1, width = 0.1, height = 0.2)
# Save the plot as a png file
# Sizes of exported image should be set to be similar to the plotting window, modified by dev.new(width=, height=)
ggplot2::ggsave( filename = file.path(task.1.output.folder.path,"employment-history_programming-software-added.png")
,width = 10
,height = 5
,plot= print(gantt.chart.final) #
,device = "png")
#--------------------------------------------------
# Find and copy image file types
#--------------------------------------------------
# Input, output directory
task.2.input.folder.path <- file.path(dir.GLC.presentation,"images-download")
task.2.output.folder.path <- file.path(task.2.input.folder.path,"images-only") # dir.exists(task.2.output.folder.path)
#dir.create(task.2.output.folder.path)
image.files.want.source <- list.files( path = task.2.input.folder.path
,pattern = "\\.png$|\\.jpg$"
,full.names = TRUE) # length(image.files.want.source) 10
image.files.want.destination <- file.path(task.2.output.folder.path
,basename(image.files.want.source))
# Copy files. Destination files will have the same date times as source files
file.copy(from = image.files.want.source
,to = image.files.want.destination
,copy.date = TRUE)
# Copy files. Destination files will have the date times when copy is performed
## Delete copied files before running this code
file.copy(from = image.files.want.source
,to = image.files.want.destination
,copy.date = FALSE)
#-----------------------
# List folders as a tree
#-----------------------
# Input directories
dir.tmf <- file.path(dir.GLC.P.drive,"BIOSTATISTICS","Gina","tmf","Numbered")
dir.tmf.short <- file.path(dir.tmf, "Short_Names") # dir.exists(dir.tmf.short)
dir.tmf.long.stat <- file.path(dir.tmf,"Long_Names","Statistics") # dir.exists(dir.tmf.long.stat)
# List directories as a tree
fs::dir_tree(path = dir.tmf.short, recurse = TRUE)
fs::dir_tree(path = dir.tmf.long.stat, recurse = TRUE)
#--------------------------------------
# Create folders using existing folders
#--------------------------------------
# Get folder directories within dir.tmf.short
## If we set, recursive = TRUE, the directories are returned in hierarchical order, beginning with the main directory path and then followed by the sub-directories. However, if we specify recursive = FALSE, only the child directories are returned
folder.paths.tmf.short.child <- list.dirs(path = dir.tmf.short
,full.names = TRUE
,recursive = FALSE) # length(folder.paths.tmf.short.child) 6
# Get child, grandchild directories
folder.paths.tmf.short.grandchild <- list.dirs(path=folder.paths.tmf.short.child
,full.names = TRUE) # length(folder.paths.tmf.short.grandchild) 31
# Copy source trial master file folders to destination
# Create a new parent directory
dir.tmf.new.short <- file.path(dir.GLC.presentation,"trial-master-files","numbered","short-named")
dir.create(path = dir.tmf.new.short, recursive = TRUE)
# Get child and grandchild folder names
folder.names.tmf.short.grandchild <- folder.paths.tmf.short.grandchild %>%
# Remove parent directory
gsub(pattern="C:/Google-Drive/Greenlight-clinical/P-drive/BIOSTATISTICS/Gina/tmf/Numbered/Short_Names", replacement="")
# Create new destination folder paths
dest.folder.paths.tm.short <- paste0(dir.tmf.new.short, folder.names.tmf.short.grandchild) # length(dest.folder.paths.tm.short) 31
unlist(lapply(X=dest.folder.paths.tm.short
,FUN=function(x) dir.create(x, recursive = TRUE))
)
#-----------------------------------------------
# Create folders using eTMF reference model file
#-----------------------------------------------
eTMF <- readxl::read_excel(path = file.path(dir.GLC.data,"tmf-reference-model-version-3-0_16-june-20153.xlsx")
,sheet = "v3.0"
,skip = 2
,col_names = TRUE) %>%
# Select columns wanted by position
dplyr::select(c(1:6)) # dim(eTMF) 249 6
colnames(eTMF) <- colnames(eTMF) %>%
gsub(pattern=" #", replacement="_number") %>%
gsub(pattern=" ", replacement="_")
# Select Zone_Name="Statistics"
eTMF.stat <- eTMF %>%
dplyr::filter(Zone_Name=="Statistics") %>%
# Remove dots in *_number columns
dplyr::mutate(Section_number=gsub(pattern="\\.", replacement="",x=Section_number)
,Artifact_number=gsub(pattern="\\.", replacement="",x=Artifact_number)
,Zone_number_name=paste0(Zone_number,"_",Zone_Name)
,Section_number_name=paste0(Section_number,"_",Section_Name)
,Artifact_number_name=paste0(Artifact_number,"_",Artifact_name)
,folder.path=file.path(dir.eTMF, Zone_number_name, Section_number_name,Artifact_number_name)) # dim(eTMF.stat) 25 8
# Create new folders using the folder.path column
unlist(lapply(X=eTMF.stat$folder.path
,FUN=function(x) dir.create(x, recursive = TRUE))
)
# List directories as a tree
fs::dir_tree(path = file.path(dir.GLC.presentation,"eTMF","11_Statistics")
,recurse = TRUE)
#--------------------------------------------------
# Reduce file size of a single image
#--------------------------------------------------
task.3.input.folder.path <- file.path(dir.main, "Receipts-meals-not-claimed-yet") # dir.exists(task.3.input.folder.path)
task.3.output.folder.path <- file.path(dir.GLC.presentation,"reduced_images") # dir.exists(task.3.output.folder.path)
file.paths.jpg <- list.files(path=task.3.input.folder.path
,pattern = "*.jpg"
,full.names = TRUE) # length(file.paths.jpg) 98
# Reduce 1 jpg file
img <- jpeg::readJPEG(source=file.paths.jpg[2]
, native = TRUE) # class(img) [1] "nativeRaster"
# Export reduced object as a jpg file
output.file.path <-paste0(task.3.output.folder.path,"/small_",basename(file.paths.jpg[2]))
jpeg::writeJPEG(image=img
,target = output.file.path
, quality = 0.5)
#--------------------------------------------------
# Reduce file size of multiple image image files
#--------------------------------------------------
# cl <- parallel::makeCluster(parallel::detectCores()-1
# ,outfile=file.path(task.3.output.folder.path
# ,paste0("image-reduction_error-log.txt")
# )
# ) # Use all detected cores minus 1 core (CPU) as a cluster, leaving 1 core for software or app running in the background in case desktop crashes (monitor turns black and nothing working)
# doParallel::registerDoParallel(cl, cores = parallel::detectCores()-1)
# Loop thru all jpg images to reduce file sizes
# Check existence of input folder
if(dir.exists(task.3.input.folder.path)!=TRUE){
cat("Input image folder could not be found")
} else{
# Read input image file paths as a vector
image.file.paths <- list.files(path = task.3.input.folder.path
,full.names = TRUE
,pattern = "\\.jpg$") # length(image.file.paths) 99
cat("There are",length(image.file.paths),"images in the folder")
# Use image file name to look up data for the image in the cell segmentation data
foreach::foreach(i=1:length(image.file.paths)
,.combine = 'c'
,.packages=c("foreach","dplyr","jpeg")) %dopar% {
print(paste0("=========== Processing image ",i,"============"))
# If data can be found, then subset. If data cannot be found, then copy the input image to the output
image.file.path <-image.file.paths[i]
image.file.name <-basename(image.file.path)
# Name output file
output.file.path <-paste0(task.3.output.folder.path,"/small_", image.file.name)
# Read jpg file
img <- jpeg::readJPEG(source=image.file.path, native = TRUE) # class(img) [1] "nativeRaster"
# Reduce jpg file size and output jpg file
jpeg::writeJPEG(image=img
,target = output.file.path
, quality = 0.5)
}
}
# Calculate % reduction
total.file.size.original <- 339
total.file.size.reduced <- 59.2
(total.file.size.original-total.file.size.reduced)/total.file.size.original*100 #[1] 82.53687
#---------------------------------------------
# Google map link to a single QR code
#---------------------------------------------
url <- "http://www.google.com/maps/place/49.46800006494457,17.11514008755796"
# A function to plot QR code in changeable color
ggQRCode <- function(text, color="black", alpha=1) {
x <- qr_code(text)
x <- as.data.frame(x)
y <- x
y$id <- rownames(y)
y <- gather(y, "key", "value", colnames(y)[-ncol(y)])
y$key = factor(y$key, levels=rev(colnames(x)))
y$id = factor(y$id, levels=rev(rownames(x)))
ggplot(y, aes_(x=~id, y=~key)) + geom_tile(aes_(fill=~value), alpha=alpha) +
scale_fill_manual(values=c("white", color)) +
# scale_fill_gradient(low="white", high=color) +
theme_void() + theme(legend.position='none')
}
# QR code in black
plot.new()
QR.black <- ggQRCode(url,color="black")
plot(QR.black)
# QR code in a different color
QR.blue <- ggQRCode(url,color="#56B4E9")
plot(QR.blue)
#-------------------------------------------
# Brisbane bike path latitude and longitude to QR codes
#-------------------------------------------
task.32.input.folder.path <- file.path(dir.GLC.presentation, "data-download")
task.32.output.folder.path <- file.path(dir.GLC.presentation, "QR-codes")
# Read csv files
bikeway <- read.delim(file=file.path(task.32.input.folder.path,"open-data-bikeway-count-sites-2004-2021.csv")
,header = TRUE
,sep = ",")[,c(1:3,5,6)] # dim(bikeway) 183 5
# Rename columns
colnames(bikeway) <- c("Site.ID","Parent.Site.ID","Site.Name","Latitude","Longitude")
# Create a new column with Google maps URLs
bikeway.2 <- bikeway %>%
dplyr::mutate(Google.map.link=paste0("http://www.google.com/maps/place/",Latitude,",",Longitude))
# Create output file paths
task.32.output.file.paths <- file.path(task.32.output.folder.path, bikeway.2$Site.ID,".svg") # length(task.32.output.file.paths) 183
# Loop thru all jpg images to reduce file sizes
# Check existence of input folder
if(dir.exists(task.32.output.folder.path)!=TRUE){
cat("Output folder could not be found")
} else{
# Use image file name to look up data for the image in the cell segmentation data
foreach::foreach(i=1:length(bikeway.2$Site.ID)
,.combine = 'c'
,.packages=c("foreach","dplyr","ggplot2","qrcode","svglite","grid")) %dopar% {
print(paste0("=========== Creating QR code ",i,"============"))
# Get Google map URL
url <-bikeway.2$Google.map.link[i]
# Name output file
output.file.path <- paste0(task.32.output.folder.path,"/", bikeway.2$Site.ID[i],".svg")
# Create QR code
QR.code <- ggQRCode(url, color="#56B4E9")
# Add label to plot
QR.code.2 <- QR.code+ggplot2::annotation_custom(grid::textGrob(bikeway.2$Site.Name[i])
,xmin = -Inf
,xmax = Inf
,ymin = -Inf
,ymax = Inf)
# Save QR code as .svg file
ggplot2::ggsave( filename = output.file.path
,plot= print(QR.code.2)
,device = "svg")
}
}
The code above produces this plot
Feb 23, 2024Issue R files shown as 0 KB. R files reopened as empty in RStudio.Solution I had a similar issue with older R files that opened as empty. It turned out that RStudio didn’t use the correct encoding as default and therefore wasn’t able to read the file (presented the file as empty). You can make sure that you are using the correct encoding by:0. Copying R code to a text file
Dec 2, 2023ERROR An error occurred executing the workspace job “autoexec”. SDS Failed to provide the SAS workspace. SAS.EC.Directory.Model.SDSEXception
Dec 1, 2023Chang’s working examples created in R.
Nov 28, 2023or
By clicking below, you agree to our terms of service.
New to HackMD? Sign up