## nf-core/mag and galaxy benchmarking comparison CAMI II palnt-associated: https://cami-challenge.org/datasets/Plant-associated/ ## Sequencing Data Download CAMI2 Plant associated ```bash conda activate aria2 mkdir short_read/ && cd short_read/ wget https://frl.publisso.de/data/frl:6425521/plant_associated/short_read/rhimgCAMI2_sample_{0..20}_reads.tar.gz for i in *.gz; do tar xzf $i; done cd ../ mkdir long_read/ && cd long_read/ ## Started having intermittent connection issues, switch to aria2c for i in {0..20}; do echo $i; aria2c "https://frl.publisso.de/data/frl:6425521/plant_associated/long_read_nano/rhimgCAMI2_sample_${i}_reads.tar.gz"; done for i in *.gz; do tar xzf $i; done conda deactivate ``` Prepare samplesheet ```csv sample,group,short_reads_1,short_reads_2,long_reads,short_reads_platform,long_reads_platform sample_0,0,/ptmp/jfellowsy/databases/CAMI/plant_associated/long_read/simulation_nanosim/2020.01.23_15.51.11_sample_0/reads/anonymous_reads.fq.gz,,/ptmp/jfellowsy/databases/CAMI/plant_associated/short_read/simulation_short_read/2019.09.27_13.59.10_sample_0/reads/anonymous_reads.fq.gz,ILLUMINA,OXFORD_NANOPORE sample_10,0,/ptmp/jfellowsy/databases/CAMI/plant_associated/long_read/simulation_nanosim/2020.01.23_15.51.11_sample_10/reads/anonymous_reads.fq.gz,,/ptmp/jfellowsy/databases/CAMI/plant_associated/short_read/simulation_short_read/2019.09.27_13.59.10_sample_10/reads/anonymous_reads.fq.gz,ILLUMINA,OXFORD_NANOPORE sample_11,0,/ptmp/jfellowsy/databases/CAMI/plant_associated/long_read/simulation_nanosim/2020.01.23_15.51.11_sample_11/reads/anonymous_reads.fq.gz,,/ptmp/jfellowsy/databases/CAMI/plant_associated/short_read/simulation_short_read/2019.09.27_13.59.10_sample_11/reads/anonymous_reads.fq.gz,ILLUMINA,OXFORD_NANOPORE sample_12,0,/ptmp/jfellowsy/databases/CAMI/plant_associated/long_read/simulation_nanosim/2020.01.23_15.51.11_sample_12/reads/anonymous_reads.fq.gz,,/ptmp/jfellowsy/databases/CAMI/plant_associated/short_read/simulation_short_read/2019.09.27_13.59.10_sample_12/reads/anonymous_reads.fq.gz,ILLUMINA,OXFORD_NANOPORE sample_13,0,/ptmp/jfellowsy/databases/CAMI/plant_associated/long_read/simulation_nanosim/2020.01.23_15.51.11_sample_13/reads/anonymous_reads.fq.gz,,/ptmp/jfellowsy/databases/CAMI/plant_associated/short_read/simulation_short_read/2019.09.27_13.59.10_sample_13/reads/anonymous_reads.fq.gz,ILLUMINA,OXFORD_NANOPORE sample_14,0,/ptmp/jfellowsy/databases/CAMI/plant_associated/long_read/simulation_nanosim/2020.01.23_15.51.11_sample_14/reads/anonymous_reads.fq.gz,,/ptmp/jfellowsy/databases/CAMI/plant_associated/short_read/simulation_short_read/2019.09.27_13.59.10_sample_14/reads/anonymous_reads.fq.gz,ILLUMINA,OXFORD_NANOPORE sample_15,0,/ptmp/jfellowsy/databases/CAMI/plant_associated/long_read/simulation_nanosim/2020.01.23_15.51.11_sample_15/reads/anonymous_reads.fq.gz,,/ptmp/jfellowsy/databases/CAMI/plant_associated/short_read/simulation_short_read/2019.09.27_13.59.10_sample_15/reads/anonymous_reads.fq.gz,ILLUMINA,OXFORD_NANOPORE sample_16,0,/ptmp/jfellowsy/databases/CAMI/plant_associated/long_read/simulation_nanosim/2020.01.23_15.51.11_sample_16/reads/anonymous_reads.fq.gz,,/ptmp/jfellowsy/databases/CAMI/plant_associated/short_read/simulation_short_read/2019.09.27_13.59.10_sample_16/reads/anonymous_reads.fq.gz,ILLUMINA,OXFORD_NANOPORE sample_17,0,/ptmp/jfellowsy/databases/CAMI/plant_associated/long_read/simulation_nanosim/2020.01.23_15.51.11_sample_17/reads/anonymous_reads.fq.gz,,/ptmp/jfellowsy/databases/CAMI/plant_associated/short_read/simulation_short_read/2019.09.27_13.59.10_sample_17/reads/anonymous_reads.fq.gz,ILLUMINA,OXFORD_NANOPORE sample_18,0,/ptmp/jfellowsy/databases/CAMI/plant_associated/long_read/simulation_nanosim/2020.01.23_15.51.11_sample_18/reads/anonymous_reads.fq.gz,,/ptmp/jfellowsy/databases/CAMI/plant_associated/short_read/simulation_short_read/2019.09.27_13.59.10_sample_18/reads/anonymous_reads.fq.gz,ILLUMINA,OXFORD_NANOPORE sample_19,0,/ptmp/jfellowsy/databases/CAMI/plant_associated/long_read/simulation_nanosim/2020.01.23_15.51.11_sample_19/reads/anonymous_reads.fq.gz,,/ptmp/jfellowsy/databases/CAMI/plant_associated/short_read/simulation_short_read/2019.09.27_13.59.10_sample_19/reads/anonymous_reads.fq.gz,ILLUMINA,OXFORD_NANOPORE sample_1,0,/ptmp/jfellowsy/databases/CAMI/plant_associated/long_read/simulation_nanosim/2020.01.23_15.51.11_sample_1/reads/anonymous_reads.fq.gz,,/ptmp/jfellowsy/databases/CAMI/plant_associated/short_read/simulation_short_read/2019.09.27_13.59.10_sample_1/reads/anonymous_reads.fq.gz,ILLUMINA,OXFORD_NANOPORE sample_20,0,/ptmp/jfellowsy/databases/CAMI/plant_associated/long_read/simulation_nanosim/2020.01.23_15.51.11_sample_20/reads/anonymous_reads.fq.gz,,/ptmp/jfellowsy/databases/CAMI/plant_associated/short_read/simulation_short_read/2019.09.27_13.59.10_sample_20/reads/anonymous_reads.fq.gz,ILLUMINA,OXFORD_NANOPORE sample_2,0,/ptmp/jfellowsy/databases/CAMI/plant_associated/long_read/simulation_nanosim/2020.01.23_15.51.11_sample_2/reads/anonymous_reads.fq.gz,,/ptmp/jfellowsy/databases/CAMI/plant_associated/short_read/simulation_short_read/2019.09.27_13.59.10_sample_2/reads/anonymous_reads.fq.gz,ILLUMINA,OXFORD_NANOPORE sample_3,0,/ptmp/jfellowsy/databases/CAMI/plant_associated/long_read/simulation_nanosim/2020.01.23_15.51.11_sample_3/reads/anonymous_reads.fq.gz,,/ptmp/jfellowsy/databases/CAMI/plant_associated/short_read/simulation_short_read/2019.09.27_13.59.10_sample_3/reads/anonymous_reads.fq.gz,ILLUMINA,OXFORD_NANOPORE sample_4,0,/ptmp/jfellowsy/databases/CAMI/plant_associated/long_read/simulation_nanosim/2020.01.23_15.51.11_sample_4/reads/anonymous_reads.fq.gz,,/ptmp/jfellowsy/databases/CAMI/plant_associated/short_read/simulation_short_read/2019.09.27_13.59.10_sample_4/reads/anonymous_reads.fq.gz,ILLUMINA,OXFORD_NANOPORE sample_5,0,/ptmp/jfellowsy/databases/CAMI/plant_associated/long_read/simulation_nanosim/2020.01.23_15.51.11_sample_5/reads/anonymous_reads.fq.gz,,/ptmp/jfellowsy/databases/CAMI/plant_associated/short_read/simulation_short_read/2019.09.27_13.59.10_sample_5/reads/anonymous_reads.fq.gz,ILLUMINA,OXFORD_NANOPORE sample_6,0,/ptmp/jfellowsy/databases/CAMI/plant_associated/long_read/simulation_nanosim/2020.01.23_15.51.11_sample_6/reads/anonymous_reads.fq.gz,,/ptmp/jfellowsy/databases/CAMI/plant_associated/short_read/simulation_short_read/2019.09.27_13.59.10_sample_6/reads/anonymous_reads.fq.gz,ILLUMINA,OXFORD_NANOPORE sample_7,0,/ptmp/jfellowsy/databases/CAMI/plant_associated/long_read/simulation_nanosim/2020.01.23_15.51.11_sample_7/reads/anonymous_reads.fq.gz,,/ptmp/jfellowsy/databases/CAMI/plant_associated/short_read/simulation_short_read/2019.09.27_13.59.10_sample_7/reads/anonymous_reads.fq.gz,ILLUMINA,OXFORD_NANOPORE sample_8,0,/ptmp/jfellowsy/databases/CAMI/plant_associated/long_read/simulation_nanosim/2020.01.23_15.51.11_sample_8/reads/anonymous_reads.fq.gz,,/ptmp/jfellowsy/databases/CAMI/plant_associated/short_read/simulation_short_read/2019.09.27_13.59.10_sample_8/reads/anonymous_reads.fq.gz,ILLUMINA,OXFORD_NANOPORE sample_9,0,/ptmp/jfellowsy/databases/CAMI/plant_associated/long_read/simulation_nanosim/2020.01.23_15.51.11_sample_9/reads/anonymous_reads.fq.gz,,/ptmp/jfellowsy/databases/CAMI/plant_associated/short_read/simulation_short_read/2019.09.27_13.59.10_sample_9/reads/anonymous_reads.fq.gz,ILLUMINA,OXFORD_NANOPORE ``` ## Databases Download GTDB DB (for better microbes) ```bash conda create -n aria2 -c conda-forge aria2 conda activate aria2 aria2c https://data.gtdb.aau.ecogenomic.org/releases/release226/226.0/auxillary_files/gtdbtk_package/full_package/gtdbtk_r226_data.tar.gz tar xzf gtdbtk_r226_data.tar.gz conda deactivate ``` Download CAT_pack (because we may have more than just bacteria) ```bash wget https://tbb.bio.uu.nl/tina/CAT_pack_prepare/20241212_CAT_nr.tar.gz --no-check-certificate tar xzvf 20241212_CAT_nr.tar.gz ``` Download Checkm2 DB ```bash conda create -n checkm2 -c bioconda checkm2 conda activate checkm2 checkm2 database --download --path . conda deactivate ``` Download MetaEuk / NR ```bash conda create -n metaeuk -c bioconda metaeuk conda activate metaekuk mkdir nr metaeuk databases NR nr/NR tmp --threads 2 conda deactivate ``` Download genomad database ```bash wget https://zenodo.org/records/14886553/files/genomad_db_v1.9.tar.gz tar xzvf genomad_db_v1.9.tar.gz ``` ~~Get host removal~~ We don't have any host info, so can't do this GUNC ```bash conda create -n gunc -c bioconda gunc conda activate gunc gunc download_db . conda deactivate ``` CheckM ```bash mkdir checkm cd checkm wget https://zenodo.org/record/7401545#.Y44ymHbMJD8 cd ../ ``` ## Execute TODO: fix `--checkm2_db` ```bash module load apptainer/1.4.3 conda activate nf-core nextflow run nf-core/mag -r 5.3.0 -profile mpcdf,raven \ --input /ptmp/jfellowsy/nextflow/mag/galaxy_benchmark/data/samplesheet.csv \ --outdir /ptmp/jfellowsy/nextflow/mag/galaxy_benchmark/results/ -w /ptmp/jfellowsy/nextflow/mag/galaxy_benchmark/work \ --single_end \ --longread_adaptertrimming_tool porechop \ --skip_clipping \ --cat_db /ptmp/jfellowsy/nextflow/mag/galaxy_benchmark/databases/20241212_CAT_nr_website \ --gtdb_db /ptmp/jfellowsy/nextflow/mag/galaxy_benchmark/databases/release226 \ --metaeuk_db /ptmp/jfellowsy/nextflow/mag/galaxy_benchmark/databases/metaeuk/nr \ --run_virus_identification \ --genomad_db /ptmp/jfellowsy/nextflow/mag/galaxy_benchmark/databases/genomad/genomad_db/ \ --exclude_unbins_from_postbinning \ --refine_bins_dastool \ --postbinning_input both \ --run_gunc \ --gunc_db /ptmp/jfellowsy/nextflow/mag/galaxy_benchmark/databases/gunc/gunc_db_progenomes2.1.dmnd \ --run_checkm \ --checkm_db /ptmp/jfellowsy/nextflow/mag/galaxy_benchmark/databases \ --run_busco \ --busco_db /raven/ptmp/jfellowsy/nextflow/mag/galaxy_benchmark/databases/busco_downloads ```