# RNA Seq with Julia Class 10/6/20 Goal: Build counts table and analyze expression data in Julia https://biojulia.net/GenomicFeatures.jl/v0.2/io/gff3/ log on to CRC Make new working directory for today and cd into it copy files for today ``` cp /tmp/ND_ICG_OCT_6/* . module load julia using BioAlignments using GenomicFeatures using DataStructures using XAM using GFF3 #reminder that you can add packages if you don't have them by # ] # add <package> # ctrl+c #uses XAM package bam_reader = open(BAM.Reader, "sample.bam") #uses GFF3 package gff3_reader = open(GFF3.Reader, "sample.gff3") for record in reader seqid = GFF3.seqid(reader) end for record in bam_reader if BAM.ismapped(record) println(BAM.refname(record),':',BAM.position(record)) end for record in gff3_reader println(record) #trying to go back and get this complete command - will update filter!(x -> GFF3.featuretype(x) == "mRNA", gff3_reader) #produces error - filter function has been removed/deprecated for feature in gff3_reader for record in eachoverlap(bam_reader,feature) println(record) end end #Need index for BAM - use samtools to create (you don't need to do this, it will be in /tmp/ND_IGC_2020-OCT_06) #module load bio/2.0 #samtools sort -o sample.sorted.bam sample.sorted.index #samtools index sample.sorted.bam #copy index to current working directory cp /tmp/ND_ICG_2020_OCT_06/*.sorted* . for feature in gff3_reader if(GFF3.featuretype(feature) =="mRNA") println(feature) end target = GFF3.initialize! ``` --- ## October 8, 2020 Review/addressing errors from Tuesday Log on to CRC module load julia julia ``` using BioAlignments using GenomicFeatures using DataStructures using XAM using GFF3 bam_reader = open(BAM.Reader, "sample.bam") gff3_reader = open(GFF3.Reader, "sample.gff3") for record in gff3_reader println(record) end #get mRNAs from gff3 for record in gff3_reader if(GFF3.featuretype(record) == "mRNA") println(record) end end #count mRNAs (similar to Salmon) bam_reader = open(BAM.Reader, "sample.sorted.bam", index = "sample.sorted.index") for feature in gff3_reader for record in eachoverlap(bam_reader, feature) println(record) end end for feature in gff3_reader if(GFF3.featuretype(feature) == "mRNA") println(feature) i=0 for record in eachoverlap(bam_reader, feature) i=i+1 end println(i) end end #iterating through reader isn't working, trying to turn data into a dataframe to iterate instead #add package DataFrames using DataFrames df = DataFrame(feature = GFF3.Reader, count = Int[]) for feature in gff3_reader if(GFF3.featuretype(feature) =="mRNA") i=0 for record in eachoverlap(bam_reader, feature) i = i+1 end push!(df, (feature,i)) end end