docker container --help
docker image ls
docker image pull hello-world
docker image ls
docker container run hello-world
docker image pull alpine
docker container run alpine cat /etc/os-release
docker contiatner run -it alpine sh #-it: interactive terminal, sh for bash
ls run
ls bin
exit
docker image rm hello-world
docker container ls #view running
docker container ls -a #view all
docker container rm [Cotainer ID]
docker image rm hello-world
docker container ls -a
(WARNING: removes all containers) docker container prune
docker image pull python #will pull latest version
docker image pull python:3.11-alpine
docker image pull python:3.11-alpine
docker container run -it pyton:3.11-alpine
a = 1
b=2
a+b
quit()
nano Dockerfile #must name file Dockerfile
FROM alpine
RUN apt add –update python3 py3-pip python3-dev
RUN pip install cython
CMD ["python3", "–version"]
ls
cat Dockerfile
docker image build -t alpine-python:0.1.0 .
docker image ls
dockeer container run alpine-python:0.1.0
docker image pull ghcr.io/imageomics/dataverse-access
docker image ls
docker tag ghcr.io/imageomics/dataverse-access dva-image #rename for easier use, dva = dataverse access
data image ls #note the names just point to same thing
docker container run dva-image
docker container run -v $(pwd)/data:/data -it dva-image dva download doi:10.5072/FK2/B7LCCX #alt --volume, $(pwd)...places in current working directory to new folder called data
docker run dva-image dva ls doi:10.5072/FK2/B7LCCX --url https://datacommons.tdai.osu.edu/ #see what's there before we download to computer
Python with a twist
pwd #make sure we're in the right place
cp -r /fs/ess/PAS2136/Workshops/Snakemake/files/* .
ls
mkdir -p .snakemake/singularity
cp /fs/ess/PAS2136/Workshops/Snakemake/singularity_images/* .snakemake/singularity/.
sinteractive -t 01:00:00 -A PAS2136 #open up room to work (specific to OSC), now on worker node
. Scripts/setup_env.sh #set up work environment
snakemake --version
head -n 11 multimedia.csv #only want first 11 lines of the csv (10 data rows plus one header line)
head -n 11 multimedia.csv > reduce/multimedia.csv
nano Snakefile
rule reduce:
input: "multimedia.csv"
output: "reduce/multimedia.csv"
shell: "head -n 11 {input} > {output}"
shell will then fill in to do: "head -n 11 multimedia.csv > reduce/multimedia.csv"
snakemake #need to tell it how many cores to use (add -c1)
snakemake -c1
snakemake -c1 #nothing to be done, already there and working
cat Snakefile #shows file content
ls
ls reduce/ #tells you what made it
nano Snakefile #edit to 21
snakemake -c1 #will remake file now since it's been edited
nano Snakefile #back to 11
snakemake -c1 --dry-run #tells what it would do, but doesn't run
#rule is like a recipe, job is an ingredient
snakemake -help
nano Snakefile
rule reduce:
input: "multimedia.csv"
params:
rows = "11",
type = "fish"
output: "reduce/multimedia.csv"
shell: "head -n {params.rows} {input} > {output}"
snakemake -c1
head Scripts/FilterImagesHardCoded.R
nano Snakefile
rule reduce:
input: "multimedia.csv"
params: lines = "11"
output: "reduce/multimedia.csv"
shell: "head -n {params.lines} {input} > {output}"
rule filter:
input:
script = "Scripts/FilterImagesHardCoded.R",
fishes = "reduce/multimedia.csv"
output: "filter/multimedia.csv"
shell: "Rscript {input.script}"
snakemake -c1 #will automatically run the first rule (if it wouldn't run, doesn't look further)
snakemake -c1 filter/multimedia.csv
ls filter/
nano Snakefile
rull all:
input: "filter/multimedia.csv"
rule reduce:
input: "multimedia.csv"
params: lines = "11"
output: "reduce/multimedia.csv"
shell: "head -n {params.lines} {input} > {output}"
rule filter:
input:
script = "Scripts/FilterImagesHardCoded.R",
fishes = "reduce/multimedia.csv"
output: "filter/multimedia.csv"
shell: "Rscript {input.script}"
Note: fishes not used directly, but alerts it to go look for reduce/multimedia.csv, snakemake checks inputs first ("checking dependencies")
snakemake -c1
cat Scripts/FilterImagesHardCoded.R
nano config.yaml
reduce_multimedia: "reduce/multimedia.csv"
filter_multimedia: "filter/multimedia.csv"
cat Scripts/FilterImages.R #to read config file
nano Snakefile
configfile: "config.yaml"
rule all:
input: "filter/multimedia.csv"
rule reduce:
input: "multimedia.csv"
params: lines = "11"
output: "reduce/multimedia.csv"
shell: "head -n {params.lines} {input} > {output}"
rule filter:
input:
script = "Scripts/FilterImagesHardCoded.R",
fishes = config["reduce_multimedia"]
output: config["filter_multimedia"]
shell: "Rscript {input.script}"
Note: could use
snakemake -c1 --config reduce_multi = blah, filter_multi = blah2
if changing run once, but better to make config file
snakemake -c1
wget -O test.jpg https://bgnn.tulane.edu/hdr-share/ftp/ark/89609/GLIN/FMNH/dd216t3d.jpg
nano Snakefile
Use arkID for unique naming across sites!
configfile: "config.yaml"
rule all:
input: "filter/multimedia.csv"
rule reduce:
input: "multimedia.csv"
params: lines = "11"
output: "reduce/multimedia.csv"
shell: "head -n {params.lines} {input} > {output}"
rule filter:
input:
script = "Scripts/FilterImagesHardCoded.R",
fishes = config["reduce_multimedia"]
output: config["filter_multimedia"]
shell: "Rscript {input.script}"
def get_image_url(wildcards):
base_image_url = "https://bgnn.tulane.edu/hdr-share/ftp/ark/89609/GLIN/FMNH/"
return base_image_url + "dd216t3d.jpg"
rule download_image:
params: url=get_image_url
output: "Images/dd216t3d.jpg"
shell: "wget -O {output} {params.url}"
Note: this rule will only download one image.
sinteractive -t 01:00:00 -A PAS2136
. Scripts/setup_env.sh
Making image download rule generic to download a single specified image and rename.
configfile: "config.yaml"
rule all:
input: "filter/multimedia.csv"
rule reduce:
input: "multimedia.csv"
params: lines = "11"
output: "reduce/multimedia.csv"
shell: "head -n {params.lines} {input} > {output}"
rule filter:
input:
script = "Scripts/FilterImagesHardCoded.R",
fishes = config["reduce_multimedia"]
output: config["filter_multimedia"]
shell: "Rscript {input.script}"
def get_image_url(wildcards):
base_image_url = "https://bgnn.tulane.edu/hdr-share/ftp/ark/89609/GLIN/FMNH/"
return base_image_url + wildcards.ark_id + ".jpg"
rule download_image:
params: url=get_image_url
output: "Images/{ark_id}.jpg"
shell: "wget -O {output} {params.url}"
The wildcards
in get_image_url
is specified as an output of the download_image
rule, which is how Snakemake interprets what to put in wildcards.ark_id
.
snakemake -c1 Images/dd216t3d.jpg
nano Snakefile
Let's download and process the images. Enter the Pandas!
import pandas as pd
configfile: "config.yaml"
rule all:
input: "filter/multimedia.csv"
rule reduce:
input: "multimedia.csv"
params: lines = "11"
output: "reduce/multimedia.csv"
shell: "head -n {params.lines} {input} > {output}"
rule filter:
input:
script = "Scripts/FilterImagesHardCoded.R",
fishes = config["reduce_multimedia"]
output: config["filter_multimedia"]
shell: "Rscript {input.script}"
def get_image_url(wildcards):
filename = "multimedia.csv"
df = pd.read_csv(filename)
base_image_url = "https://bgnn.tulane.edu/hdr-share/ftp/ark/89609/GLIN/FMNH/"
return base_image_url + wildcards.ark_id + ".jpg"
rule download_image:
params: url=get_image_url
output: "Images/{ark_id}.jpg"
shell: "wget -O {output} {params.url}"
snakemake -c1 Images/hd529k3h.jpg
Change the rule all:
def get_image_filenames(wildcards):
filename = config["filter_multimedia"]
df = pd.read_csv(filename)
ark_ids = df["arkID"].tolist()
return expand("Images/{ark_id}.jpg", ark_id=ark_ids)
rule all:
input: get_image_filenames
snakemake -c1
To start from scratch:
rm -r filter/ reduce/
Change the get_image_filename
function:
checkpoint get_image_filenames(wildcards):
filename = checkpoints.filter.get().output[0]
df = pd.read_csv(filename)
ark_ids = df["arkID"].tolist()
return expand("Images/{ark_id}.jpg", ark_id=ark_ids)
snakemake -c1
docker pull alpine:3
singularity pull docker://alpine:3
rule download_image:
params: url=get_image_url
output:'Images/{ark_id}.jpg'
container: "docker://quay.io/biocontainers/gnu-wget:1.18--h60da905_7"
shell: "wget -O {output} {params.url}"
rm Images/hd529k3h.jpg
snakemake -c1 --use-singularity Images/hd529k3h.jpg
module bgnn_core:
snakefile:
github("hdr-bgnn/BGNN_Core_Workflow", path="workflow/Snakefile", tag="1.0.0")
use rule generate_metadata from bgnn_core
snakemake -c1 --use-singularity DrexelMetadata/bj373514.json
module bgnn_core:
snakefile:
github("hdr-bgnn/BGNN_Core_Workflow", path="workflow/Snakefile", tag="1.0.0")
use rule generate_metadata from bgnn_core
use rule transform_metadata from bgnn_core
use rule crop_image from bgnn_core
use rule segment_image from bgnn_core