Try   HackMD
tags: R Bash Rscript --vanilla commandArgs()

Run a R script file as a batch of PBS jobs on HPC

Part 1: Set up a shell file that runs part 2 as a submitted PBS job

# Specify locations of main folders, input and output folders
homeDir="/mnt/backedup/home/lunC";
locScripts="${homeDir}/scripts/MR_ICC_GSCAN_201806";
locHistory="${homeDir}/history";
jobScriptFilePath="${locScripts}/MR_step06-03-03_run-R-script-via-bash.sh";

workingDir="/mnt/lustre/working/lab_nickm/lunC";
locMR="${workingDir}/MR_ICC_GSCAN_201806";

# Location of input files
loc_input="${locMR}/two-sample-MR/input"
# Location of output files
loc_output="${loc_input}/harmonised-data"
loc_MR_result=${locMR}/two-sample-MR/output
pbs_output_dir="${loc_output}/pbs_output"

# Set up resources requested for submitting PBS jobs
num_cpu=1;
runTime_requested=10:00:00; 
memory_requested=5gb;

# Location of 2 TSV files to loop thru each line of them, excluding their header rows
filePath_tsv_1="${loc_input}/file-info_exposure-clumped-GWASs.tsv";
filePath_tsv_2="${loc_input}/file-info_outcome-QCed-GWASs.tsv";

# Loop through each line of tsv files
## 3 qsub -v variables are passed to part 2 as 3 Shell variable 
IFS=$'\n';
count=0;
for lineF1 in `tail -n+2 $filePath_tsv_1`;do
    for lineF2 in `tail -n+2 $filePath_tsv_2`;do
    qsub -v v_part1_variable1=${part1_variable1},v_part1_variable2=${part1_variable2},v_part1_variable3=${part1_variable3} ${jobScriptFilePath};
    done
done

Part 2: Set up a Shell file that runs part 3 using Rscript --vanilla

#!/bin/bash # Pass qsub -v variables from part 1 to Shell variables in this file part2_variable_1=${v_part1_variable1} part2_variable_2=${v_part1_variable2} part2_variable_3=${v_part1_variable3} # Set up directory locScripts="/mnt/backedup/home/lunC/scripts/PRS_UKB_201711" RScriptFileName="PRS_UKB_201711_step21-05-02_jobScript_2VarACE_genetic-corr-between-SUD-and-SUD-QIMR-adults.R" RScriptFilePath=${locScripts}/${RScriptFileName} # Load software R in order to run a R file through the Rscript command module load R/3.4.1 # Run a R script using Rscript command ## ${RScriptFilePath} : path of the R script file to run ## arguments that will be passed into the R script file: Rscript --vanilla ${RScriptFilePath} ${part2_variable_1} ${part2_variable_2} ${part2_variable_3}

Part 3: Set up the R script file.

#!/usr/bin/env Rscript

#---------------------------------------------
# Part A: Get arguments specified in part 2
#---------------------------------------------
arguments.passed.bash.to.R <- commandArgs(trailingOnly = TRUE)
print(paste0(arguments.passed.bash.to.R))

# Check if the arguments contain nothing 
if (length(arguments.passed.bash.to.R) < 1)
  stop("Missing argument: num.rows")

#---------------------------------------------
# part B: Extract individual elements of the list of arguments 
#---------------------------------------------
print("Passing arguments from commandline to R")
part3.variable.1 <- arguments.passed.bash.to.R[[1]]
part3.variable.2 <- arguments.passed.bash.to.R[[2]]
part3.variable.3 <- arguments.passed.bash.to.R[[3]]

#---------------------------------------------
# part C: Testing code with fixed values for the variables
#---------------------------------------------
part3.variable.1 <- "A.1"
part3.variable.2 <- "A.2"
part3.variable.3 <- "A.3"

#-----------------------------------------------------
# part D: Run R code bellows using the variables above
# When the code is fully tested, comment out part C. Activate part B
#-----------------------------------------------------