# Final Exam
### Question 1:
1) Create an environment named `BCH709_final`.
2) Install all the required software within this environment.
3) After installation, use `conda export` to generate an export of the environment, and save it as `final.yml`
4) Please upload the `final.yml` file along with the command line history of the installation process
```
<Create BCH709_final conda environment and install packages>
conda create -y -n BCH709_FINAL -c bioconda -c conda-forge -c anaconda mamba STAR multiqc samtools trinity=2.13.2 bioconductor-qvalue sambamba graphviz gffread tpmcalculator lxml subread=2.0.1 trim-galore=0.6.7
conda install -c bioconda bioconductor-tximport
<Export BCH709_final as final.yml>
conda env export -n BCH709_final > final.yml
cat final.yml
name: BCH709_final
channels:
- anaconda
- bioconda
- conda-forge
- defaults
dependencies:
- _libgcc_mutex=0.1=conda_forge
- _openmp_mutex=4.5=2_gnu
- _r-mutex=1.0.1=anacondar_1
- alsa-lib=1.2.8=h166bdaf_0
- argcomplete=3.0.8=pyhd8ed1ab_0
- atk-1.0=2.38.0=hd4edc92_1
- bamtools=2.5.1=hd03093a_10
- binutils_impl_linux-64=2.40=hf600244_0
- bioconductor-annotate=1.76.0=r42hdfd78af_0
- bioconductor-annotationdbi=1.60.0=r42hdfd78af_0
- bioconductor-biobase=2.58.0=r42ha9d7317_1
- bioconductor-biocfilecache=2.6.0=r42hdfd78af_0
- bioconductor-biocgenerics=0.44.0=r42hdfd78af_0
- bioconductor-biocio=1.8.0=r42hdfd78af_0
- bioconductor-biocparallel=1.32.5=r42hf17093f_1
- bioconductor-biomart=2.54.0=r42hdfd78af_0
- bioconductor-biostrings=2.66.0=r42ha9d7317_1
- bioconductor-ctc=1.72.0=r42hdfd78af_0
- bioconductor-data-packages=20231203=hdfd78af_0
- bioconductor-delayedarray=0.24.0=r42ha9d7317_1
- bioconductor-deseq2=1.38.0=r42hf17093f_1
- bioconductor-dexseq=1.44.0=r42hdfd78af_2
- bioconductor-edger=3.40.0=r42hf17093f_1
- bioconductor-genefilter=1.80.0=r42ha1e849b_1
- bioconductor-genelendatabase=1.34.0=r42hdfd78af_0
- bioconductor-geneplotter=1.76.0=r42hdfd78af_0
- bioconductor-genomeinfodb=1.34.9=r42hdfd78af_0
- bioconductor-genomeinfodbdata=1.2.9=r42hdfd78af_0
- bioconductor-genomicalignments=1.34.0=r42ha9d7317_1
- bioconductor-genomicfeatures=1.50.2=r42hdfd78af_0
- bioconductor-genomicranges=1.50.0=r42ha9d7317_1
- bioconductor-go.db=3.16.0=r42hdfd78af_0
- bioconductor-goseq=1.50.0=r42hdfd78af_0
- bioconductor-iranges=2.32.0=r42ha9d7317_1
- bioconductor-keggrest=1.38.0=r42hdfd78af_0
- bioconductor-limma=3.54.0=r42ha9d7317_1
- bioconductor-matrixgenerics=1.10.0=r42hdfd78af_0
- bioconductor-qvalue=2.30.0=r42hdfd78af_0
- bioconductor-rhtslib=2.0.0=r42ha9d7317_1
- bioconductor-rsamtools=2.14.0=r42hf17093f_1
- bioconductor-rtracklayer=1.58.0=r42h171f361_1
- bioconductor-s4vectors=0.36.0=r42ha9d7317_1
- bioconductor-summarizedexperiment=1.28.0=r42hdfd78af_0
- bioconductor-tximport=1.26.0=r42hdfd78af_0
- bioconductor-xvector=0.38.0=r42ha9d7317_1
- bioconductor-zlibbioc=1.44.0=r42ha9d7317_2
- boost-cpp=1.78.0=h5adbc97_2
- bottleneck=1.3.5=py37hda87dfa_0
- bowtie=1.3.1=py37h24c3a67_5
- bowtie2=2.5.1=py37hb24965f_1
- brotli=1.1.0=hd590300_1
- brotli-bin=1.1.0=hd590300_1
- brotli-python=1.0.9=py37hd23a5d3_7
- bwidget=1.9.14=ha770c72_1
- bzip2=1.0.8=hd590300_5
- c-ares=1.23.0=hd590300_0
- ca-certificates=2023.11.17=hbcca054_0
- cairo=1.16.0=ha61ee94_1014
- certifi=2023.11.17=pyhd8ed1ab_0
- cffi=1.15.1=py37h43b0acd_1
- charset-normalizer=3.3.2=pyhd8ed1ab_0
- click=8.1.3=py37h89c1867_0
- coloredlogs=15.0.1=pyhd8ed1ab_3
- colormath=3.0.0=py_2
- conda=22.9.0=py37h89c1867_1
- conda-package-handling=2.2.0=pyh38be061_0
- conda-package-streaming=0.9.0=pyhd8ed1ab_0
- coreutils=8.25=1
- cryptography=38.0.2=py37h38fbfac_1
- curl=7.87.0=h6312ad2_0
- cutadapt=4.4=py37h8902056_0
- cycler=0.11.0=pyhd8ed1ab_0
- dnaio=0.10.0=py37h8902056_1
- expat=2.5.0=hcb278e6_1
- fastqc=0.12.1=hdfd78af_0
- font-ttf-dejavu-sans-mono=2.37=hab24e00_0
- font-ttf-inconsolata=3.000=h77eed37_0
- font-ttf-source-code-pro=2.038=h77eed37_0
- font-ttf-ubuntu=0.83=h77eed37_1
- fontconfig=2.14.2=h14ed4e7_0
- fonts-conda-ecosystem=1=0
- fonts-conda-forge=1=0
- fonttools=4.38.0=py37h540881e_0
- freetype=2.12.1=h267a509_2
- fribidi=1.0.10=h36c2ea0_0
- future=0.18.2=py37h89c1867_5
- gcc_impl_linux-64=13.2.0=h338b0a0_3
- gdk-pixbuf=2.42.10=h5eee18b_0
- gettext=0.21.1=h27087fc_0
- gffread=0.12.7=hdcf5f25_3
- gfortran_impl_linux-64=13.2.0=h76e1118_3
- giflib=5.2.1=h0b41bf4_3
- glib=2.78.1=hfc55251_0
- glib-tools=2.78.1=hfc55251_0
- glpk=5.0=h445213a_0
- gmp=6.3.0=h59595ed_0
- graphite2=1.3.13=h58526e2_1001
- graphviz=8.0.3=h2e5815a_0
- gsl=2.7=he838d99_0
- gtk2=2.24.33=h90689f9_2
- gts=0.7.6=h977cf35_4
- gxx_impl_linux-64=13.2.0=h338b0a0_3
- harfbuzz=6.0.0=h8e241bc_0
- hdf5=1.12.2=nompi_h2386368_101
- htseq=2.0.3=py37hfcd875c_0
- htslib=1.17=h6bc39ce_1
- humanfriendly=10.0=py37h89c1867_2
- icu=70.1=h27087fc_0
- idna=3.6=pyhd8ed1ab_0
- importlib-metadata=4.11.4=py37h89c1867_0
- isa-l=2.30.0=hd590300_6
- jinja2=3.1.2=pyhd8ed1ab_1
- jpeg=9e=h0b41bf4_3
- jq=1.5=0
- kallisto=0.50.1=hc877fd6_0
- kernel-headers_linux-64=2.6.32=he073ed8_16
- keyutils=1.6.1=h166bdaf_0
- kiwisolver=1.4.4=py37h7cecad7_0
- kmer-jellyfish=2.3.0=h9f5acd7_3
- krb5=1.20.1=hf9c8cef_0
- lcms2=2.14=h6ed2654_0
- ld_impl_linux-64=2.40=h41732ed_0
- ldc=1.28.1=hcf88599_0
- lerc=4.0.0=h27087fc_0
- libaec=1.1.2=h59595ed_1
- libarchive=3.5.2=hb890918_3
- libblas=3.9.0=20_linux64_openblas
- libbrotlicommon=1.1.0=hd590300_1
- libbrotlidec=1.1.0=hd590300_1
- libbrotlienc=1.1.0=hd590300_1
- libcblas=3.9.0=20_linux64_openblas
- libcups=2.3.3=h36d4200_3
- libcurl=7.87.0=h6312ad2_0
- libdeflate=1.13=h166bdaf_0
- libedit=3.1.20191231=he28a2e2_2
- libev=4.33=hd590300_2
- libexpat=2.5.0=hcb278e6_1
- libffi=3.4.2=h7f98852_5
- libgcc=7.2.0=h69d50b8_2
- libgcc-devel_linux-64=13.2.0=ha9c7c90_103
- libgcc-ng=13.2.0=h807b86a_3
- libgd=2.3.3=h18fbbfe_3
- libgfortran-ng=13.2.0=h69a702a_3
- libgfortran5=13.2.0=ha4646dd_3
- libglib=2.78.1=hebfc3b9_0
- libgomp=13.2.0=h807b86a_3
- libhwloc=2.9.1=hd6dc26d_0
- libiconv=1.17=hd590300_2
- libjemalloc=5.3.0=hcb278e6_0
- liblapack=3.9.0=20_linux64_openblas
- libllvm12=12.0.1=hf817b99_2
- libmamba=0.27.0=h0dd8ff0_0
- libmambapy=0.27.0=py37h1ee4b26_0
- libnghttp2=1.51.0=hdcd2b5c_0
- libnsl=2.0.1=hd590300_0
- libopenblas=0.3.25=pthreads_h413a1c8_0
- libpng=1.6.39=h753d276_0
- librsvg=2.54.4=h7abd40a_0
- libsanitizer=13.2.0=h7e041cc_3
- libsolv=0.7.27=hfc55251_0
- libsqlite=3.44.2=h2797004_0
- libssh2=1.10.0=haa6b8db_3
- libstdcxx-devel_linux-64=13.2.0=ha9c7c90_103
- libstdcxx-ng=13.2.0=h7e041cc_3
- libtiff=4.4.0=h0e0dad5_3
- libtool=2.4.7=h27087fc_0
- libuuid=2.38.1=h0b41bf4_0
- libwebp=1.3.2=h11a3e52_0
- libwebp-base=1.3.2=hd590300_0
- libxcb=1.13=h7f98852_1004
- libxml2=2.10.3=hca2bb57_4
- libxslt=1.1.37=h873f0b0_0
- libzlib=1.2.13=hd590300_5
- lxml=4.9.1=py37h540881e_0
- lz4-c=1.9.4=hcb278e6_0
- lzo=2.10=h516909a_1000
- lzstring=1.0.4=py_1001
- make=4.3=hd18ef5c_1
- mamba=0.27.0=py37h6dacc13_0
- markdown=3.5.1=pyhd8ed1ab_0
- markdown-it-py=2.2.0=pyhd8ed1ab_0
- markupsafe=2.1.1=py37h540881e_1
- matplotlib-base=3.5.3=py37hf395dca_2
- mdurl=0.1.0=pyhd8ed1ab_0
- multiqc=1.17=pyhdfd78af_0
- munkres=1.0.7=py_1
- ncurses=6.4=h59595ed_2
- networkx=2.7=pyhd8ed1ab_0
- nomkl=1.0=h5ca1d4c_0
- numexpr=2.8.3=py37h85a3170_100
- numpy=1.21.6=py37h976b520_0
- openjdk=17.0.3=h58dac75_5
- openjpeg=2.5.0=h7d73246_1
- openssl=1.1.1w=h7f8727e_0
- packaging=23.2=pyhd8ed1ab_0
- pandas=1.3.5=py37h8c16a72_0
- pandoc=3.1.3=h32600fe_0
- pango=1.50.14=hd33c08f_0
- pbzip2=1.1.13=h1fcc475_2
- pcre2=10.40=hc3806b6_0
- perl=5.32.1=4_hd590300_perl5
- pigz=2.8=h2797004_0
- pillow=9.2.0=py37h850a105_2
- pip=23.3.1=pyhd8ed1ab_0
- pixman=0.42.2=h59595ed_0
- pthread-stubs=0.4=h36c2ea0_1001
- pybind11-abi=4=hd8ed1ab_3
- pycosat=0.6.4=py37h540881e_0
- pycparser=2.21=pyhd8ed1ab_0
- pygments=2.17.2=pyhd8ed1ab_0
- pyopenssl=23.2.0=pyhd8ed1ab_1
- pyparsing=3.1.1=pyhd8ed1ab_0
- pysam=0.21.0=py37hee149a5_0
- pysocks=1.7.1=py37h89c1867_5
- python=3.7.12=hb7a2778_100_cpython
- python-dateutil=2.8.2=pyhd8ed1ab_0
- python-isal=1.1.0=py37h540881e_0
- python_abi=3.7=4_cp37m
- pytz=2023.3.post1=pyhd8ed1ab_0
- pyyaml=6.0=py37h540881e_4
- r-amap=0.8_19=r42hcf54a89_1
- r-ape=5.7_1=r42h08d816e_1
- r-argparse=2.2.2=r42hc72bb7e_1
- r-askpass=1.2.0=r42h57805ef_0
- r-assertthat=0.2.1=r42hc72bb7e_4
- r-backports=1.4.1=r42h57805ef_2
- r-base=4.2.2=h6b4767f_2
- r-base64enc=0.1_3=r42h57805ef_1006
- r-bh=1.81.0_1=r42hc72bb7e_1
- r-biasedurn=2.0.11=r42ha503ecb_0
- r-bit=4.0.5=r42h57805ef_1
- r-bit64=4.0.5=r42h57805ef_2
- r-bitops=1.0_7=r42h57805ef_2
- r-blob=1.2.4=r42hc72bb7e_1
- r-broom=1.0.5=r42hc72bb7e_1
- r-bslib=0.6.1=r42hc72bb7e_0
- r-cachem=1.0.8=r42h57805ef_1
- r-callr=3.7.3=r42hc72bb7e_1
- r-catools=1.18.2=r42ha503ecb_2
- r-cellranger=1.1.0=r42hc72bb7e_1006
- r-cli=3.6.2=r42ha503ecb_0
- r-clipr=0.8.0=r42hc72bb7e_2
- r-cluster=2.1.6=r42h61816a4_0
- r-codetools=0.2_19=r42hc72bb7e_1
- r-colorspace=2.1_0=r42h57805ef_1
- r-cpp11=0.4.7=r42hc72bb7e_0
- r-crayon=1.5.2=r42hc72bb7e_2
- r-curl=4.3.3=r42h06615bd_1
- r-data.table=1.14.10=r42h029312a_0
- r-dbi=1.1.3=r42hc72bb7e_2
- r-dbplyr=2.4.0=r42hc72bb7e_0
- r-digest=0.6.33=r42ha503ecb_0
- r-dplyr=1.1.4=r42ha503ecb_0
- r-dtplyr=1.3.1=r42hc72bb7e_1
- r-ellipsis=0.3.2=r42h57805ef_2
- r-evaluate=0.23=r42hc72bb7e_0
- r-fansi=1.0.6=r42h57805ef_0
- r-farver=2.1.1=r42ha503ecb_2
- r-fastcluster=1.2.3=r42h59595ed_2
- r-fastmap=1.1.1=r42ha503ecb_1
- r-fastmatch=1.1_4=r42h57805ef_0
- r-filelock=1.0.3=r42h57805ef_0
- r-findpython=1.0.8=r42hc72bb7e_1
- r-fontawesome=0.5.2=r42hc72bb7e_0
- r-forcats=1.0.0=r42hc72bb7e_1
- r-formatr=1.14=r42hc72bb7e_1
- r-fs=1.6.3=r42ha503ecb_0
- r-futile.logger=1.4.3=r42hc72bb7e_1005
- r-futile.options=1.0.1=r42hc72bb7e_1004
- r-gargle=1.5.2=r42h785f33e_0
- r-generics=0.1.3=r42hc72bb7e_2
- r-ggplot2=3.4.4=r42hc72bb7e_0
- r-glue=1.6.2=r42h57805ef_2
- r-googledrive=2.1.1=r42hc72bb7e_1
- r-googlesheets4=1.1.1=r42h785f33e_1
- r-gplots=3.1.3=r42hc72bb7e_2
- r-gtable=0.3.4=r42hc72bb7e_0
- r-gtools=3.9.5=r42h57805ef_0
- r-haven=2.5.4=r42ha503ecb_0
- r-highr=0.10=r42hc72bb7e_1
- r-hms=1.1.3=r42hc72bb7e_1
- r-htmltools=0.5.7=r42ha503ecb_0
- r-httr=1.4.7=r42hc72bb7e_0
- r-hwriter=1.3.2.1=r42hc72bb7e_2
- r-ids=1.0.1=r42hc72bb7e_3
- r-igraph=1.4.2=r42h65ed38e_0
- r-isoband=0.2.7=r42ha503ecb_2
- r-jquerylib=0.1.4=r42hc72bb7e_2
- r-jsonlite=1.8.8=r42h57805ef_0
- r-kernsmooth=2.23_22=r42h13b3f57_0
- r-knitr=1.45=r42hc72bb7e_0
- r-labeling=0.4.3=r42hc72bb7e_0
- r-lambda.r=1.2.4=r42hc72bb7e_3
- r-lattice=0.22_5=r42h57805ef_0
- r-lifecycle=1.0.4=r42hc72bb7e_0
- r-locfit=1.5_9.8=r42h57805ef_1
- r-lubridate=1.9.3=r42h57805ef_0
- r-magrittr=2.0.3=r42h57805ef_2
- r-mass=7.3_60=r42h57805ef_1
- r-matrix=1.6_4=r42h316c678_0
- r-matrixstats=1.2.0=r42h57805ef_0
- r-memoise=2.0.1=r42hc72bb7e_2
- r-mgcv=1.9_0=r42h316c678_0
- r-mime=0.12=r42h57805ef_2
- r-modelr=0.1.11=r42hc72bb7e_1
- r-munsell=0.5.0=r42hc72bb7e_1006
- r-nlme=3.1_164=r42h61816a4_0
- r-openssl=2.0.5=r42hb1dc35e_0
- r-phangorn=2.11.1=r42h7ce84a7_1
- r-pillar=1.9.0=r42hc72bb7e_1
- r-pkgconfig=2.0.3=r42hc72bb7e_3
- r-plogr=0.2.0=r42hc72bb7e_1005
- r-plyr=1.8.9=r42ha503ecb_0
- r-png=0.1_8=r42h81d01c5_1
- r-prettyunits=1.2.0=r42hc72bb7e_0
- r-processx=3.8.3=r42h57805ef_0
- r-progress=1.2.3=r42hc72bb7e_0
- r-ps=1.7.5=r42h57805ef_1
- r-purrr=1.0.2=r42h57805ef_0
- r-quadprog=1.5_8=r42h13b3f57_5
- r-r6=2.5.1=r42hc72bb7e_2
- r-rappdirs=0.3.3=r42h57805ef_2
- r-rcolorbrewer=1.1_3=r42h785f33e_2
- r-rcpp=1.0.11=r42h7df8631_0
- r-rcpparmadillo=0.12.6.4.0=r42h08d816e_0
- r-rcurl=1.98_1.10=r42h133d619_0
- r-readr=2.1.4=r42ha503ecb_1
- r-readxl=1.4.3=r42ha5c9fba_0
- r-rematch=2.0.0=r42hc72bb7e_0
- r-rematch2=2.1.2=r42hc72bb7e_3
- r-reprex=2.0.2=r42hc72bb7e_2
- r-reshape2=1.4.4=r42ha503ecb_3
- r-restfulr=0.0.15=r42h56115f1_2
- r-rjson=0.2.21=r42ha503ecb_3
- r-rlang=1.1.2=r42ha503ecb_0
- r-rmarkdown=2.25=r42hc72bb7e_0
- r-rsqlite=2.3.4=r42ha503ecb_0
- r-rstudioapi=0.15.0=r42hc72bb7e_0
- r-rvest=1.0.3=r42hc72bb7e_2
- r-sass=0.4.8=r42ha503ecb_0
- r-scales=1.3.0=r42hc72bb7e_0
- r-selectr=0.4_2=r42hc72bb7e_3
- r-sm=2.2_5.7.1=r42h61816a4_2
- r-snow=0.4_4=r42hc72bb7e_2
- r-statmod=1.5.0=r42hd8f1df9_1
- r-stringi=1.7.12=r42h1ae9187_0
- r-stringr=1.5.1=r42h785f33e_0
- r-survival=3.5_7=r42h57805ef_0
- r-sys=3.4.2=r42h57805ef_1
- r-tibble=3.2.1=r42h57805ef_2
- r-tidyr=1.3.0=r42ha503ecb_1
- r-tidyselect=1.2.0=r42hc72bb7e_1
- r-tidyverse=1.3.2=r42hc72bb7e_1
- r-timechange=0.2.0=r42ha503ecb_1
- r-tinytex=0.49=r42hc72bb7e_0
- r-tzdb=0.4.0=r42ha503ecb_1
- r-utf8=1.2.4=r42h57805ef_0
- r-uuid=1.1_1=r42h57805ef_0
- r-vctrs=0.6.5=r42ha503ecb_0
- r-vioplot=0.4.0=r42hc72bb7e_1
- r-viridislite=0.4.2=r42hc72bb7e_1
- r-vroom=1.6.5=r42ha503ecb_0
- r-withr=2.5.2=r42hc72bb7e_0
- r-xfun=0.41=r42ha503ecb_0
- r-xml=3.99_0.14=r42hb43fdd4_0
- r-xml2=1.3.3=r42h044e5c7_2
- r-xtable=1.8_4=r42hc72bb7e_5
- r-yaml=2.3.8=r42h57805ef_0
- r-zoo=1.8_12=r42h57805ef_1
- readline=8.2=h8228510_1
- reproc=14.2.4.post0=hd590300_1
- reproc-cpp=14.2.4.post0=h59595ed_1
- requests=2.31.0=pyhd8ed1ab_0
- rich=13.7.0=pyhd8ed1ab_0
- rich-click=1.7.2=pyhd8ed1ab_0
- ruamel_yaml=0.15.80=py37h540881e_1007
- salmon=1.10.2=hecfa306_0
- sambamba=1.0=h98b6b92_0
- samtools=1.18=hd87286a_0
- scipy=1.7.3=py37hf2a6cf1_0
- sed=4.8=he412f7d_0
- setuptools=68.2.2=pyhd8ed1ab_0
- simplejson=3.17.6=py37h540881e_1
- six=1.16.0=pyh6c4a22f_0
- spectra=0.0.11=py_1
- sqlite=3.44.2=h2c6b66d_0
- star=2.7.11a=h0033a41_0
- subread=2.0.1=h7132678_2
- sysroot_linux-64=2.12=he073ed8_16
- tbb=2021.9.0=hf52228f_0
- tk=8.6.13=noxft_h4845f30_101
- tktable=2.10=h0c5db8f_5
- toml=0.10.2=pyhd8ed1ab_0
- tomlkit=0.12.3=pyha770c72_0
- toolz=0.12.0=pyhd8ed1ab_0
- tpmcalculator=0.0.4=hf393df8_3
- trim-galore=0.6.7=hdfd78af_0
- trimmomatic=0.39=hdfd78af_2
- trinity=2.13.2=hff880f7_4
- typing-extensions=4.7.1=hd8ed1ab_0
- typing_extensions=4.7.1=pyha770c72_0
- unicodedata2=14.0.0=py37h540881e_1
- urllib3=2.1.0=pyhd8ed1ab_0
- wheel=0.42.0=pyhd8ed1ab_0
- xmltodict=0.13.0=pyhd8ed1ab_0
- xopen=1.6.0=py37h89c1867_0
- xorg-fixesproto=5.0=h7f98852_1002
- xorg-inputproto=2.3.2=h7f98852_1002
- xorg-kbproto=1.0.7=h7f98852_1002
- xorg-libice=1.0.10=h7f98852_0
- xorg-libsm=1.2.3=hd9c2040_1000
- xorg-libx11=1.8.4=h0b41bf4_0
- xorg-libxau=1.0.11=hd590300_0
- xorg-libxdmcp=1.1.3=h7f98852_0
- xorg-libxext=1.3.4=h0b41bf4_2
- xorg-libxfixes=5.0.3=h7f98852_1004
- xorg-libxi=1.7.10=h7f98852_0
- xorg-libxrender=0.9.10=h7f98852_1003
- xorg-libxt=1.3.0=hd590300_0
- xorg-libxtst=1.2.3=h7f98852_1002
- xorg-recordproto=1.14.2=h7f98852_1002
- xorg-renderproto=0.11.1=h7f98852_1002
- xorg-xextproto=7.3.0=h0b41bf4_1003
- xorg-xproto=7.0.31=h7f98852_1007
- xz=5.2.6=h166bdaf_0
- yaml=0.2.5=h7f98852_2
- yaml-cpp=0.7.0=h59595ed_3
- yq=3.2.3=pyhd8ed1ab_0
- zipp=3.15.0=pyhd8ed1ab_0
- zlib=1.2.13=hd590300_5
- zstandard=0.18.0=py37h540881e_0
- zstd=1.5.5=hfc55251_0
prefix: /data/gpfs/home/joelshin/scratch/bin/miniconda3/bin/conda/envs/BCH709_final
```
### Question 2
To conduct RNA-sequencing (RNA-seq) analysis on WT versus R6-2 treatment samples, gathered from 6-week-old mice and sequenced using Illumina paired-end technology, the following procedures are outlined. The dataset includes samples from WT and R6-2, each with four replicates.
1) Adaptor and Quality Trimming using Trim-Galore: Process all FastQ files with Trim-Galore to remove adaptors and perform quality trimming
* CREATE AND UPLOAD SBATCH COMMAND FILE
2) INdexing the Reference Genome with STAR: Create an index of the mouse genome (mm39) using the STAR aligner
* CREATE AND UPLOAD SBATCH COMMAND FILE
3) Aligning Trimmed Reads to Reference Genome with STAR Aligner: Align the trimmed reads from all samples to the mm39 reference genome using the STAR aligner
* CREATE AND UPLOAD SBATCH COMMAND FILE
4) Read Counting per Gene using FeatureCount: Count reads per gene for each aligned file using FeatureCount
* CREATE AND UPLOAD SBATCH COMMAND FILE
5) Generate a consolidated report using MultiQC and upload that file
```
<Activate final environment>
conda activate BCH709_final
<Create final folder>
mkdir /data/gpfs/assoc/bch709-4/joelshin/final/
<Create sub-folders under final: fastq, ref, trim, bam, readcount, DEG>
mkdir /data/gpfs/assoc/bch709-4/joelshin/final/fastq
mkdir /data/gpfs/assoc/bch709-4/joelshin/final/ref
mkdir /data/gpfs/assoc/bch709-4/joelshin/final/trim
mkdir /data/gpfs/assoc/bch709-4/joelshin/final/bam
mkdir /data/gpfs/assoc/bch709-4/joelshin/final/readcount
mkdir /data/gpfs/assoc/bch709-4/joelshin/final/DEG
<Change into reference working directory>
cd /data/gpfs/assoc/bch709-4/joelshin/final/ref
<Copy reference file from Course materials into reference working directory>
cp /data/gpfs/assoc/bch709-4/Course_materials/final/ref/mm39.fa .
cp /data/gpfs/assoc/bch709-4/Course_materials/final/ref/refGene.gtf .
<Create a batch file: ref_build.sh>
cp /data/gpfs/assoc/bch709-4/Course_materials/mouse/run.sh ./ref_build.sh
<Edit batch file>
nano ref_build.sh
#!/bin/bash
#SBATCH --job-name=STAR_aligner_reference_joelshin
#SBATCH --cpus-per-task=2
#SBATCH --mem=64g
#SBATCH --time=2-15:00:00
#SBATCH --mail-type=all
#SBATCH --mail-user=joelshin@nevada.unr.edu
#SBATCH -o log.%x.%j.out # STDOUT & STDERR
#SBATCH -p cpu-core-0
#SBATCH -A cpu-s5-bch709-4
####SBATCH --dependency=afterok:<PREVIOUS_JOBID>
STAR --runThreadN 4 --runMode genomeGenerate --genomeDir . --genomeFastaFiles mm39.fa --sjdbGTFfile refGene.gtf --sjdbOverhang 99 --genomeSAindexNbases 12
<Submit batch file>
sbatch ref_build.sh
<Check job status>
squeue -u joelshin
<Change directory to fastq directory after completing reference alignment>
cd ../
cd fastq
<Link fastq file from course material to fastq directory>
ln -s /data/gpfs/assoc/bch709-4/Course_materials/final/fastq/* .
<Create filelist>
ls -1 *.gz | sed 's/_R.\.fastq\.gz//g' | sort -u > /data/gpfs/assoc/bch709-4/joelshin/final/filelist
<Prepare trim batch file>
cp /data/gpfs/assoc/bch709-4/Course_materials/mouse/run.sh ./trim.sh
sed -i "s/\-\-cpus\-per\-task\=2/\-\-cpus\-per\-task\=4/g; s/\[NAME\]/Trim/g; s/\[youremail\]/${USER}\@unr.edu\,${USER}\@nevada.unr.edu/g" /data/gpfs/assoc/bch709-4/joelshin/final/fastq/trim.sh
<Edit trim batch file>
nano trim.sh
#!/bin/bash
#SBATCH --job-name=Trim_fastq_joelshin
#SBATCH --cpus-per-task=4
#SBATCH --mem=32g
#SBATCH --time=2-15:00:00
#SBATCH --mail-type=all
#SBATCH --mail-user=joelshin@nevada.unr.edu
#SBATCH -o log.%x.%j.out # STDOUT & STDERR
#SBATCH -p cpu-core-0
#SBATCH -A cpu-s5-bch709-4
####SBATCH --dependency=afterok:<PREVIOUS_JOBID>
<Create individual batch files for fastq files>
for i in `cat ../filelist`
do
read1=${i}_R1.fastq.gz
read2=${read1//_R1.fastq.gz/_R2.fastq.gz}
echo $read1 $read2
echo "trim_galore --paired --three_prime_clip_R1 5 --three_prime_clip_R2 5 --cores 2 --max_n 40 --fastqc --gzip -o /data/gpfs/assoc/bch709-4/joelshin/final/trim $read1 $read2" | cat trim.sh - > ${i}_trim.sh
done
<Submit trim batch job>
for i in `ls -1 *.sh`
do
sbatch $i
done
<Change directory to trim folder>
cd ../trim
<Copy a RNA-seq alignment batch file into your trim working directory>
cp /data/gpfs/assoc/bch709-4/Course_materials/mouse/run.sh ./mapping.sh
<Change parameters on your batch file>
sed -i "s/16g/64g/g; s/\-\-cpus\-per\-task\=2/\-\-cpus\-per\-task\=4/g; s/\[NAME\]/Trim/g; s/\[youremail\]/${USER}\@unr.edu\,${USER}\@nevada.unr.edu/g" ./mapping.sh
nano mapping.sh
#!/bin/bash
#SBATCH --job-name=Trim_RNA-Seq_joelshin
#SBATCH --cpus-per-task=4
#SBATCH --mem=64g
#SBATCH --time=2-15:00:00
#SBATCH --mail-type=all
#SBATCH --mail-user=joelshin@nevada.unr.edu
#SBATCH -o log.%x.%j.out # STDOUT & STDERR
#SBATCH -p cpu-core-0
#SBATCH -A cpu-s5-bch709-4
####SBATCH --dependency=afterok:<PREVIOUS_JOBID>
<Create individual batch files with RNA-seq batch files>
for i in `cat ../filelist`
do
read1=${i}_R1_val_1.fq.gz
read2=${read1//_R1_val_1.fq.gz/_R2_val_2.fq.gz}
echo $read1 $read2
echo "STAR --runMode alignReads --runThreadN 4 --outFilterMultimapNmax 100 --alignIntronMin 25 --alignIntronMax 50000 --genomeDir /data/gpfs/assoc/bch709-4/joelshin/final/ref --readFilesCommand gunzip -c --readFilesIn /data/gpfs/assoc/bch709-4/joelshin/final/trim/${read1} /data/gpfs/assoc/bch709-4/joelshin/final/trim/${read2} --outSAMtype BAM SortedByCoordinate --outFileNamePrefix /data/gpfs/assoc/bch709-4/joelshin/final/bam/${i}.bam" | cat mapping.sh - > ${i}_mapping.sh
done
<Submit RNA-seq alignmnet batch job>
for i in `ls -1 *_mapping.sh`
> do
> sbatch $i
> done
<Check job status>
squeue -u joelshin
<Change to bam directory>
cd ../bam
<Copy feature count batch file from course material into bam directory>
cp /data/gpfs/assoc/bch709-4/Course_materials/mouse/run.sh ./count.sh
<Change configuration in count batch file>
sed -i "s/16g/64g/g; s/\-\-cpus\-per\-task\=2/\-\-cpus\-per\-task\=4/g; s/\[NAME\]/Count/g; s/\[youremail\]/${USER}\@unr.edu\,${USER}\@nevada.unr.edu/g" /data/gpfs/assoc/bch709-4/joelshin/final/bam/count.sh
<Further change>
nano count.sh
#!/bin/bash
#SBATCH --job-name=FeatureCount_final_joelshin
#SBATCH --cpus-per-task=4
#SBATCH --mem=64g
#SBATCH --time=2-15:00:00
#SBATCH --mail-type=all
#SBATCH --mail-user=joelshin@nevada.unr.edu
#SBATCH -o log.%x.%j.out # STDOUT & STDERR
#SBATCH -p cpu-core-0
#SBATCH -A cpu-s5-bch709-4
####SBATCH --dependency=afterok:<PREVIOUS_JOBID>
featureCounts -o /data/gpfs/assoc/bch709-4/joelshin/final/readcount/readcount -T 4 -Q 1 -p -M -g gene_id -a /data/gpfs/assoc/bch709-4/joelshin/final/ref/refGene.gtf $(for i in `cat /data/gpfs/assoc/bch709-4/joelshin/final/filelist`; do echo ${i}.bamAligned.sortedByCoord.out.bam| tr '\n' ' ';done)
<Submit job>
sbatch count.sh
<Check status>
squeue -u joelshin
<Change to readcount directory>
cd ../readcount
<Call multiqc command>
multiqc --dirs /data/gpfs/assoc/bch709-4/joelshin/final/readcount --filename MultiQC_Reads
<Download Multiqc Summary file onto local computer>
cd Downloads
scp -r joelshin@pronghorn.rc.unr.edu:/data/gpfs/assoc/bch709-4/joelshin/final/readcount/MultiQC_Reads_data .
```
### Question 3
To assess the correlation between biological replicates and explore relationships among your samples, using PtR (part of the Trinity software suite) is indeed an appropriate choice. This tool will help you visualize these relationships effectively. To proceed, the PtR analysis will be conducted with these specified parameters. Upon completion, all generated PDF files need to be uploaded. These files will include visualizations like correlation matrices and other relevant plots, providing insights into the sample correlations and replicate comparisons.
File: /data/gpfs/assoc/bch709-4/Course_materials/final/mouse_sample_association
PtR Options:
CPM (Counts Per Million) normalization
Log2 transformation
Minimum row sum threshold of 10
Generation of a sample correlation matrix
Comparison of replicates
```
<Activate BCH709_final environment>
conda activate BCH709_final
<Create directory called final_mouse and sub directorys>
mkdir /data/gpfs/assoc/bch709-4/joelshin/final_mouse/fastq
mkdir /data/gpfs/assoc/bch709-4/joelshin/final_mouse/ref
mkdir /data/gpfs/assoc/bch709-4/joelshin/final_mouse/trim
mkdir /data/gpfs/assoc/bch709-4/joelshin/final_mouse/bam
mkdir /data/gpfs/assoc/bch709-4/joelshin/final_mouse/readcount
mkdir /data/gpfs/assoc/bch709-4/joelshin/final_mouse/DEG
<Change to reference directory and copy reference file from course materials into reference directory>
cd /data/gpfs/assoc/bch709-4/joelshin/final_mouse/ref
cp /data/gpfs/assoc/bch709-4/Course_materials/final/ref/refGene.gtf .
cp /data/gpfs/assoc/bch709-4/Course_materials/final/ref/mm39.fa .
cp /data/gpfs/assoc/bch709-4/Course_materials/mouse/run.sh ./ref_build.sh
<Edit Batch file>
nano ref_build.sh
#!/bin/bash
#SBATCH --job-name=STAR_Aligner_final_mouse
#SBATCH --cpus-per-task=2
#SBATCH --mem=64g
#SBATCH --time=2-15:00:00
#SBATCH --mail-type=all
#SBATCH --mail-user=joelshin@nevada.unr.edu
#SBATCH -o log.%x.%j.out # STDOUT & STDERR
#SBATCH -p cpu-core-0
#SBATCH -A cpu-s5-bch709-4
####SBATCH --dependency=afterok:<PREVIOUS_JOBID>
STAR --runThreadN 4 --runMode genomeGenerate --genomeDir . --genomeFastaFiles mm39.fa --sjdbGTFfile refGene.gtf --sjdbOverhang 99 --genomeSAindexNbases 12
<Submit job>
sbatch ref_build.sh
<Check job status>
squeue -u joelshin
<Change into fastq directory>
cd ../fastq
<Link all fastq files from course material into fastq directory and order>
ln -s /data/gpfs/assoc/bch709-4/Course_materials/final/*.gz .
ls -1 *.gz | sed 's/_R.\.fastq\.gz//g' | sort -u > ../mouse_filelist
<Copy trim file from couse material into fastq directory>
cp /data/gpfs/assoc/bch709-4/Course_materials/mouse/run.sh ./trim.sh
sed -i "s/\-\-cpus\-per\-task\=2/\-\-cpus\-per\-task\=4/g; s/\[NAME\]/Trim/g; s/\[youremail\]/${USER}\@unr.edu\,${USER}\@nevada.unr.edu/g" ./trim.sh
<Edit Batch file>
nano trim.sh
#!/bin/bash
#SBATCH --job-name=FASTQ_Trim_final_mouse
#SBATCH --cpus-per-task=4
#SBATCH --mem=16g
#SBATCH --time=2-15:00:00
#SBATCH --mail-type=all
#SBATCH --mail-user=joelshin@unr.edu,joelshin@nevada.unr.edu
#SBATCH -o log.%x.%j.out # STDOUT & STDERR
#SBATCH -p cpu-core-0
#SBATCH -A cpu-s5-bch709-4
####SBATCH --dependency=afterok:<PREVIOUS_JOBID>
<Create individual batch files for fastq jobs>
for i in `cat ../mouse_filelist`
do
read1=${i}_R1.fastq.gz
read2=${i}_R2.fastq.gz
echo $read1 $read2
echo "trim_galore --paired --three_prime_clip_R1 5 --three_prime_clip_R2 5 --cores 2 --max_n 40 --fastqc --gzip -o /data/gpfs/assoc/bch709-4/joelshin/final_mouse/trim $read1 $read2" | cat trim.sh - > ${i}_trim.sh
done
<Submit job>
for i in `ls -1 *.sh`
do
sbatch $i
done
<Check status>
squeue -u joelshin
### Before going to next step, make sure the job is finished first! ###
<Change directory to trim folder and copy batch file from course material>
cd /data/gpfs/assoc/bch709-4/joelshin/final_mouse/trim
cp /data/gpfs/assoc/bch709-4/Course_materials/mouse/run.sh ./mapping.sh
sed -i "s/16g/64g/g; s/\-\-cpus\-per\-task\=2/\-\-cpus\-per\-task\=4/g; s/\[NAME\]/Trim/g; s/\[youremail\]/${USER}\@unr.edu\,${USER}\@nevada.unr.edu/g" ./mapping.sh
<Create individual batch files>
for i in `cat ../mouse_filelist`
do
read1=${i}_R1_val_1.fq.gz
read2=${i}_R2_val_2.fq.gz
echo $read1 $read2
echo "STAR --runMode alignReads --runThreadN 4 --outFilterMultimapNmax 100 --alignIntronMin 25 --alignIntronMax 50000 --genomeDir /data/gpfs/assoc/bch709-4/joelshin/final_mouse/ref --readFilesCommand gunzip -c --readFilesIn /data/gpfs/assoc/bch709-4/joelshin/final_mouse/trim/${read1} /data/gpfs/assoc/bch709-4/joelshin/final_mouse/trim/${read2} --outSAMtype BAM SortedByCoordinate --outFileNamePrefix /data/gpfs/assoc/bch709-4/joelshin/final_mouse/bam/${i}.bam" | cat mapping.sh - > ${i}_mapping.sh
done
<Submit job>
for i in `ls -1 *_mapping.sh`
do
sbatch $i
done
### Before going to next step, make sure the job is finished first! ###
<Change directory to bam folder and copy batch file from course material>
cd /data/gpfs/assoc/bch709-4/joelshin/final_mouse/bam
cp /data/gpfs/assoc/bch709-4/Course_materials/mouse/run.sh ./count.sh
sed -i "s/16g/64g/g; s/\-\-cpus\-per\-task\=2/\-\-cpus\-per\-task\=4/g; s/\[NAME\]/Count/g; s/\[youremail\]/${USER}\@unr.edu\,${USER}\@nevada.unr.edu/g" ./count.sh
<Add command to batch file>
nano count.sh
#!/bin/bash
#SBATCH --job-name=Count_final_mouse
#SBATCH --cpus-per-task=4
#SBATCH --mem=64g
#SBATCH --time=2-15:00:00
#SBATCH --mail-type=all
#SBATCH --mail-user=joelshin@nevada.unr.edu
#SBATCH -o log.%x.%j.out # STDOUT & STDERR
#SBATCH -p cpu-core-0
#SBATCH -A cpu-s5-bch709-4
####SBATCH --dependency=afterok:<PREVIOUS_JOBID>
featureCounts -o /data/gpfs/assoc/bch709-4/joelshin/final_mouse/readcount/featurecount -T 4 -Q 1 -p -M -g gene_id -a /data/gpfs/assoc/bch709-4/joelshin/final_mouse/ref/refGene.gtf $(for i in `cat /data/gpfs/assoc/bch709-4/joelshin/final_mouse/mouse_sample_association`; do echo ${i}.bamAligned.sortedByCoord.out.bam| tr '\n' ' ';done)
<Submit Job>
sbatch count.sh
<Check Status>
squeue -u joelshin
### Before going to next step, make sure the job is finished first! ###
<Change directory to readcount folder>
cd ../readcount
<Create Only.cnt and length.cnt files>
cut -f1,7- featurecount | egrep -v "#" | sed 's/\Aligned\.sortedByCoord\.out\.bam//g; s/\.bam//g' > final_mouse_only.cnt
cut -f1,6- featurecount | egrep -v "#" | sed 's/\Aligned\.sortedByCoord\.out\.bam//g; s/\.bam//g' > final_mouse_length.cnt
<Copy files from course materials to readcount directory>
cp /data/gpfs/assoc/bch709-4/Course_materials/final/mouse_sample_association .
cp /data/gpfs/assoc/bch709-4/Course_materials/final/mouse_contrast .
<PtR analysis>
PtR --matrix final_mouse_only.cnt --samples mouse_sample_association --CPM --log2 --min_rowSums 10 --sample_cor_matrix --compare_replicates
<Go to local computer and download all pdf files from readcount folder>
(base) PS C:\Users\sakor> cd .\Downloads\
(base) PS C:\Users\sakor\Downloads> scp joelshin@pronghorn.rc.unr.edu:/data/gpfs/assoc/bch709-4/joelshin/final_mouse/DEG/*.pdf .
```
### Question 4
To identify differentially expressed genes (DEGs) in WT versus R6-2 treatment samples using DESeq2, the DEG analysis process will be conducted as follows:
1. **DE Analysis**:
- Utilize the script `run_DE_analysis.pl`.
- Apply the `--method DESeq2` option for employing the DESeq2 method.
2. **Subset Criteria**:
- Apply a significance threshold with `--P 0.05` (P-value cutoff) and `--C 1` (log2 fold change cutoff).
3. **Mouse Contrast File**:
- The contrast file to be used is located at `/data/gpfs/assoc/bch709-4/Course_materials/final/mouse_contrast`.
Task: Upload all pdf files
```
<Activate BCH709_final environment>
conda activate BCH709_final
<Change directory to DEG folder directory>
cd /data/gpfs/assoc/bch709-4/joelshin/DEG
<Run DEG analysis and crease rnaseq folder>
run_DE_analysis.pl --matrix final_mouse_only.cnt --method DESeq2 --samples_file mouse_sample_association --contrasts mouse_contrast --output final_mouse_rnaseq
<Call TPM calculator>
python /data/gpfs/assoc/bch709-4/Course_materials/script/tpm_raw_exp_calculator.py -count final_mouse_length.cnt
<Change into the rnaseq folder and call differential analysis with P value = 0.05 and C = 1>
analyze_diff_expr.pl --samples /data/gpfs/assoc/bch709-4/joelshin/final_mouse/DEG/mouse_sample_association --matrix /data/gpfs/assoc/bch709-4/joelshin/final_mouse/DEG/final_mouse_length.cnt.tpm.tab -P 0.05 -C 1 --output final_mouse_RNASEQ_P001_C1
<Go to local computer and download all pdf files from readcount folder>
(base) PS C:\Users\sakor> cd .\Downloads\
(base) PS C:\Users\sakor\Downloads> scp joelshin@pronghorn.rc.unr.edu:/data/gpfs/assoc/bch709-4/joelshin/final_mouse/DEG/*.pdf .
```
### Question 5
Please navigate to Metascape https://metascape.org/gp/index.html and individually input the lists of filtered upregulated genes for both WT-R6-2 and R6-2 from Question 5. In the selection process, choose 'Mouse' as the target organism. Proceed to conduct an 'Express Analysis' for each gene set. Once the analysis is complete, download the full report, which will include a range of figures and data visualizations for both WT-R6-2 and R6-2.
```
<Change directory to rnaseq folder>
cd /data/gpfs/assoc/bch709-4/joelshin/final_mouse/DEG/final_mouse_rnaseq
<Create .txt file for metascape>
cut -f 1 final_mouse_only.cnt.R6-2_vs_WT-R6-2.DESeq2.DE_results.P0.05_C1.DE.subset | egrep -v sampleA > Metascape_final_mouse.txt
<Go to local computer and download txt file from DEG folder>
(base) PS C:\Users\sakor> cd .\Downloads\
(base) PS C:\Users\sakor\Downloads> scp joelshin@pronghorn.rc.unr.edu:/data/gpfs/assoc/bch709-4/joelshin/final_mouse/DEG/*.txt .
```