## nf-core/mag and galaxy benchmarking comparison
CAMI II palnt-associated: https://cami-challenge.org/datasets/Plant-associated/
## Sequencing Data
Download CAMI2 Plant associated
```bash
conda activate aria2
mkdir short_read/ && cd short_read/
wget https://frl.publisso.de/data/frl:6425521/plant_associated/short_read/rhimgCAMI2_sample_{0..20}_reads.tar.gz
for i in *.gz; do tar xzf $i; done
cd ../
mkdir long_read/ && cd long_read/
## Started having intermittent connection issues, switch to aria2c
for i in {0..20}; do echo $i; aria2c "https://frl.publisso.de/data/frl:6425521/plant_associated/long_read_nano/rhimgCAMI2_sample_${i}_reads.tar.gz"; done
for i in *.gz; do tar xzf $i; done
conda deactivate
```
Prepare samplesheet
```csv
sample,group,short_reads_1,short_reads_2,long_reads,short_reads_platform,long_reads_platform
sample_0,0,/ptmp/jfellowsy/databases/CAMI/plant_associated/long_read/simulation_nanosim/2020.01.23_15.51.11_sample_0/reads/anonymous_reads.fq.gz,,/ptmp/jfellowsy/databases/CAMI/plant_associated/short_read/simulation_short_read/2019.09.27_13.59.10_sample_0/reads/anonymous_reads.fq.gz,ILLUMINA,OXFORD_NANOPORE
sample_10,0,/ptmp/jfellowsy/databases/CAMI/plant_associated/long_read/simulation_nanosim/2020.01.23_15.51.11_sample_10/reads/anonymous_reads.fq.gz,,/ptmp/jfellowsy/databases/CAMI/plant_associated/short_read/simulation_short_read/2019.09.27_13.59.10_sample_10/reads/anonymous_reads.fq.gz,ILLUMINA,OXFORD_NANOPORE
sample_11,0,/ptmp/jfellowsy/databases/CAMI/plant_associated/long_read/simulation_nanosim/2020.01.23_15.51.11_sample_11/reads/anonymous_reads.fq.gz,,/ptmp/jfellowsy/databases/CAMI/plant_associated/short_read/simulation_short_read/2019.09.27_13.59.10_sample_11/reads/anonymous_reads.fq.gz,ILLUMINA,OXFORD_NANOPORE
sample_12,0,/ptmp/jfellowsy/databases/CAMI/plant_associated/long_read/simulation_nanosim/2020.01.23_15.51.11_sample_12/reads/anonymous_reads.fq.gz,,/ptmp/jfellowsy/databases/CAMI/plant_associated/short_read/simulation_short_read/2019.09.27_13.59.10_sample_12/reads/anonymous_reads.fq.gz,ILLUMINA,OXFORD_NANOPORE
sample_13,0,/ptmp/jfellowsy/databases/CAMI/plant_associated/long_read/simulation_nanosim/2020.01.23_15.51.11_sample_13/reads/anonymous_reads.fq.gz,,/ptmp/jfellowsy/databases/CAMI/plant_associated/short_read/simulation_short_read/2019.09.27_13.59.10_sample_13/reads/anonymous_reads.fq.gz,ILLUMINA,OXFORD_NANOPORE
sample_14,0,/ptmp/jfellowsy/databases/CAMI/plant_associated/long_read/simulation_nanosim/2020.01.23_15.51.11_sample_14/reads/anonymous_reads.fq.gz,,/ptmp/jfellowsy/databases/CAMI/plant_associated/short_read/simulation_short_read/2019.09.27_13.59.10_sample_14/reads/anonymous_reads.fq.gz,ILLUMINA,OXFORD_NANOPORE
sample_15,0,/ptmp/jfellowsy/databases/CAMI/plant_associated/long_read/simulation_nanosim/2020.01.23_15.51.11_sample_15/reads/anonymous_reads.fq.gz,,/ptmp/jfellowsy/databases/CAMI/plant_associated/short_read/simulation_short_read/2019.09.27_13.59.10_sample_15/reads/anonymous_reads.fq.gz,ILLUMINA,OXFORD_NANOPORE
sample_16,0,/ptmp/jfellowsy/databases/CAMI/plant_associated/long_read/simulation_nanosim/2020.01.23_15.51.11_sample_16/reads/anonymous_reads.fq.gz,,/ptmp/jfellowsy/databases/CAMI/plant_associated/short_read/simulation_short_read/2019.09.27_13.59.10_sample_16/reads/anonymous_reads.fq.gz,ILLUMINA,OXFORD_NANOPORE
sample_17,0,/ptmp/jfellowsy/databases/CAMI/plant_associated/long_read/simulation_nanosim/2020.01.23_15.51.11_sample_17/reads/anonymous_reads.fq.gz,,/ptmp/jfellowsy/databases/CAMI/plant_associated/short_read/simulation_short_read/2019.09.27_13.59.10_sample_17/reads/anonymous_reads.fq.gz,ILLUMINA,OXFORD_NANOPORE
sample_18,0,/ptmp/jfellowsy/databases/CAMI/plant_associated/long_read/simulation_nanosim/2020.01.23_15.51.11_sample_18/reads/anonymous_reads.fq.gz,,/ptmp/jfellowsy/databases/CAMI/plant_associated/short_read/simulation_short_read/2019.09.27_13.59.10_sample_18/reads/anonymous_reads.fq.gz,ILLUMINA,OXFORD_NANOPORE
sample_19,0,/ptmp/jfellowsy/databases/CAMI/plant_associated/long_read/simulation_nanosim/2020.01.23_15.51.11_sample_19/reads/anonymous_reads.fq.gz,,/ptmp/jfellowsy/databases/CAMI/plant_associated/short_read/simulation_short_read/2019.09.27_13.59.10_sample_19/reads/anonymous_reads.fq.gz,ILLUMINA,OXFORD_NANOPORE
sample_1,0,/ptmp/jfellowsy/databases/CAMI/plant_associated/long_read/simulation_nanosim/2020.01.23_15.51.11_sample_1/reads/anonymous_reads.fq.gz,,/ptmp/jfellowsy/databases/CAMI/plant_associated/short_read/simulation_short_read/2019.09.27_13.59.10_sample_1/reads/anonymous_reads.fq.gz,ILLUMINA,OXFORD_NANOPORE
sample_20,0,/ptmp/jfellowsy/databases/CAMI/plant_associated/long_read/simulation_nanosim/2020.01.23_15.51.11_sample_20/reads/anonymous_reads.fq.gz,,/ptmp/jfellowsy/databases/CAMI/plant_associated/short_read/simulation_short_read/2019.09.27_13.59.10_sample_20/reads/anonymous_reads.fq.gz,ILLUMINA,OXFORD_NANOPORE
sample_2,0,/ptmp/jfellowsy/databases/CAMI/plant_associated/long_read/simulation_nanosim/2020.01.23_15.51.11_sample_2/reads/anonymous_reads.fq.gz,,/ptmp/jfellowsy/databases/CAMI/plant_associated/short_read/simulation_short_read/2019.09.27_13.59.10_sample_2/reads/anonymous_reads.fq.gz,ILLUMINA,OXFORD_NANOPORE
sample_3,0,/ptmp/jfellowsy/databases/CAMI/plant_associated/long_read/simulation_nanosim/2020.01.23_15.51.11_sample_3/reads/anonymous_reads.fq.gz,,/ptmp/jfellowsy/databases/CAMI/plant_associated/short_read/simulation_short_read/2019.09.27_13.59.10_sample_3/reads/anonymous_reads.fq.gz,ILLUMINA,OXFORD_NANOPORE
sample_4,0,/ptmp/jfellowsy/databases/CAMI/plant_associated/long_read/simulation_nanosim/2020.01.23_15.51.11_sample_4/reads/anonymous_reads.fq.gz,,/ptmp/jfellowsy/databases/CAMI/plant_associated/short_read/simulation_short_read/2019.09.27_13.59.10_sample_4/reads/anonymous_reads.fq.gz,ILLUMINA,OXFORD_NANOPORE
sample_5,0,/ptmp/jfellowsy/databases/CAMI/plant_associated/long_read/simulation_nanosim/2020.01.23_15.51.11_sample_5/reads/anonymous_reads.fq.gz,,/ptmp/jfellowsy/databases/CAMI/plant_associated/short_read/simulation_short_read/2019.09.27_13.59.10_sample_5/reads/anonymous_reads.fq.gz,ILLUMINA,OXFORD_NANOPORE
sample_6,0,/ptmp/jfellowsy/databases/CAMI/plant_associated/long_read/simulation_nanosim/2020.01.23_15.51.11_sample_6/reads/anonymous_reads.fq.gz,,/ptmp/jfellowsy/databases/CAMI/plant_associated/short_read/simulation_short_read/2019.09.27_13.59.10_sample_6/reads/anonymous_reads.fq.gz,ILLUMINA,OXFORD_NANOPORE
sample_7,0,/ptmp/jfellowsy/databases/CAMI/plant_associated/long_read/simulation_nanosim/2020.01.23_15.51.11_sample_7/reads/anonymous_reads.fq.gz,,/ptmp/jfellowsy/databases/CAMI/plant_associated/short_read/simulation_short_read/2019.09.27_13.59.10_sample_7/reads/anonymous_reads.fq.gz,ILLUMINA,OXFORD_NANOPORE
sample_8,0,/ptmp/jfellowsy/databases/CAMI/plant_associated/long_read/simulation_nanosim/2020.01.23_15.51.11_sample_8/reads/anonymous_reads.fq.gz,,/ptmp/jfellowsy/databases/CAMI/plant_associated/short_read/simulation_short_read/2019.09.27_13.59.10_sample_8/reads/anonymous_reads.fq.gz,ILLUMINA,OXFORD_NANOPORE
sample_9,0,/ptmp/jfellowsy/databases/CAMI/plant_associated/long_read/simulation_nanosim/2020.01.23_15.51.11_sample_9/reads/anonymous_reads.fq.gz,,/ptmp/jfellowsy/databases/CAMI/plant_associated/short_read/simulation_short_read/2019.09.27_13.59.10_sample_9/reads/anonymous_reads.fq.gz,ILLUMINA,OXFORD_NANOPORE
```
## Databases
Download GTDB DB (for better microbes)
```bash
conda create -n aria2 -c conda-forge aria2
conda activate aria2
aria2c https://data.gtdb.aau.ecogenomic.org/releases/release226/226.0/auxillary_files/gtdbtk_package/full_package/gtdbtk_r226_data.tar.gz
tar xzf gtdbtk_r226_data.tar.gz
conda deactivate
```
Download CAT_pack (because we may have more than just bacteria)
```bash
wget https://tbb.bio.uu.nl/tina/CAT_pack_prepare/20241212_CAT_nr.tar.gz --no-check-certificate
tar xzvf 20241212_CAT_nr.tar.gz
```
Download Checkm2 DB
```bash
conda create -n checkm2 -c bioconda checkm2
conda activate checkm2
checkm2 database --download --path .
conda deactivate
```
Download MetaEuk / NR
```bash
conda create -n metaeuk -c bioconda metaeuk
conda activate metaekuk
mkdir nr
metaeuk databases NR nr/NR tmp --threads 2
conda deactivate
```
Download genomad database
```bash
wget https://zenodo.org/records/14886553/files/genomad_db_v1.9.tar.gz
tar xzvf genomad_db_v1.9.tar.gz
```
~~Get host removal~~ We don't have any host info, so can't do this
GUNC
```bash
conda create -n gunc -c bioconda gunc
conda activate gunc
gunc download_db .
conda deactivate
```
CheckM
```bash
mkdir checkm
cd checkm
wget https://zenodo.org/record/7401545#.Y44ymHbMJD8
cd ../
```
## Execute
TODO: fix `--checkm2_db`
```bash
module load apptainer/1.4.3
conda activate nf-core
nextflow run nf-core/mag -r 5.3.0 -profile mpcdf,raven \
--input /ptmp/jfellowsy/nextflow/mag/galaxy_benchmark/data/samplesheet.csv \
--outdir /ptmp/jfellowsy/nextflow/mag/galaxy_benchmark/results/ -w /ptmp/jfellowsy/nextflow/mag/galaxy_benchmark/work \
--single_end \
--longread_adaptertrimming_tool porechop \
--skip_clipping \
--cat_db /ptmp/jfellowsy/nextflow/mag/galaxy_benchmark/databases/20241212_CAT_nr_website \
--gtdb_db /ptmp/jfellowsy/nextflow/mag/galaxy_benchmark/databases/release226 \
--metaeuk_db /ptmp/jfellowsy/nextflow/mag/galaxy_benchmark/databases/metaeuk/nr \
--run_virus_identification \
--genomad_db /ptmp/jfellowsy/nextflow/mag/galaxy_benchmark/databases/genomad/genomad_db/ \
--exclude_unbins_from_postbinning \
--refine_bins_dastool \
--postbinning_input both \
--run_gunc \
--gunc_db /ptmp/jfellowsy/nextflow/mag/galaxy_benchmark/databases/gunc/gunc_db_progenomes2.1.dmnd \
--run_checkm \
--checkm_db /ptmp/jfellowsy/nextflow/mag/galaxy_benchmark/databases \
--run_busco \
--busco_db /raven/ptmp/jfellowsy/nextflow/mag/galaxy_benchmark/databases/busco_downloads
```