# RNA Seq with Julia
Class 10/6/20
Goal: Build counts table and analyze expression data in Julia
https://biojulia.net/GenomicFeatures.jl/v0.2/io/gff3/
log on to CRC
Make new working directory for today and cd into it
copy files for today
```
cp /tmp/ND_ICG_OCT_6/* .
module load julia
using BioAlignments
using GenomicFeatures
using DataStructures
using XAM
using GFF3
#reminder that you can add packages if you don't have them by
# ]
# add <package>
# ctrl+c
#uses XAM package
bam_reader = open(BAM.Reader, "sample.bam")
#uses GFF3 package
gff3_reader = open(GFF3.Reader, "sample.gff3")
for record in reader
seqid = GFF3.seqid(reader)
end
for record in bam_reader
if BAM.ismapped(record)
println(BAM.refname(record),':',BAM.position(record))
end
for record in gff3_reader
println(record)
#trying to go back and get this complete command - will update
filter!(x -> GFF3.featuretype(x) == "mRNA", gff3_reader)
#produces error - filter function has been removed/deprecated
for feature in gff3_reader
for record in eachoverlap(bam_reader,feature)
println(record)
end
end
#Need index for BAM - use samtools to create (you don't need to do this, it will be in /tmp/ND_IGC_2020-OCT_06)
#module load bio/2.0
#samtools sort -o sample.sorted.bam sample.sorted.index
#samtools index sample.sorted.bam
#copy index to current working directory
cp /tmp/ND_ICG_2020_OCT_06/*.sorted* .
for feature in gff3_reader
if(GFF3.featuretype(feature) =="mRNA")
println(feature)
end
target = GFF3.initialize!
```
---
## October 8, 2020
Review/addressing errors from Tuesday
Log on to CRC
module load julia
julia
```
using BioAlignments
using GenomicFeatures
using DataStructures
using XAM
using GFF3
bam_reader = open(BAM.Reader, "sample.bam")
gff3_reader = open(GFF3.Reader, "sample.gff3")
for record in gff3_reader
println(record)
end
#get mRNAs from gff3
for record in gff3_reader
if(GFF3.featuretype(record) == "mRNA")
println(record)
end
end
#count mRNAs (similar to Salmon)
bam_reader = open(BAM.Reader, "sample.sorted.bam", index = "sample.sorted.index")
for feature in gff3_reader
for record in eachoverlap(bam_reader, feature)
println(record)
end
end
for feature in gff3_reader
if(GFF3.featuretype(feature) == "mRNA")
println(feature)
i=0
for record in eachoverlap(bam_reader, feature)
i=i+1
end
println(i)
end
end
#iterating through reader isn't working, trying to turn data into a dataframe to iterate instead
#add package DataFrames
using DataFrames
df = DataFrame(feature = GFF3.Reader, count = Int[])
for feature in gff3_reader
if(GFF3.featuretype(feature) =="mRNA")
i=0
for record in eachoverlap(bam_reader, feature)
i = i+1
end
push!(df, (feature,i))
end
end