# Lab 7
First made directory "lab_07"
'ls /courses/bi278/Course_Materials/lab_07/* > andi_inputs.txt' to prepare a file with all the genomes we want to build a phylogeny with
To run Andi with the file that was just prepared 'andi --file-of-filenames=andi_inputs.txt -t 2 --join > andi_output.dist'
'awk -F "\t" '$3=="n0" && $4!="" && $5!="" && $6!="" && $7!="" && $8!="" && $9!="" && $10!="" && $11!="" && $12!="" && $13!"" && $14!="" && $15!="" && $16!="" && $17!="" && $18!="" && $19!="" && $20!="" && $21!=""' /courses/bi278/Course_Materials/lab_07/orthofinder_18taxa/Phylogenetic_Hierarchical_Orthogroups/N0.tsv > 18taxa_coregenes.txt' (to identify orthogroups with genes present from each genome. I want all
the rows where orthologous groups share a common ancestor at node 0 (zero) and
each genome has at least one gene member in the group.)
Next, 'sed 's/, /\t/g' 18taxa_coregenes.txt | awk '{print $1, NF-1}'' to see how many genes are included in each orthogroup from above. If
there are only single copy genes, then the number of genes should be the same as
the number of genomes included in the orthofinder analysis (18).
'mafft --maxiterate 3 /courses/bi278/Course_Materials/lab_07/orthofinder_18taxa/Ortho
group_Sequences/OG0000084.fa > OG0000084_mafft.faa' to genterate a multiple sequence analysis for the chosen file using mafft
Then used the MSA as the input for iqtree 'iqtree -s OG0000084_mafft.faa'
Can then see the tree using 'cat OG0000084_mafft.faa.iqtree'
R Studio
library(ape) and library(phytools) to load phylogenetics packages
'andi.dist <- read.table("/home2/dkalde22/andi_output.dist", header=F, row.names=1, skip=1)")
AND colnames(andi.dist) <- rownames(andi.dist) to take the distance matrix generated by andi and
make a neighbor-joining tree from it.
Then 'andi.tree <- nj(as.dist(andi.dist))' to infer tree from distance matrix
then plotted tree using plot(andi.tree)
andi.tree$tip.label to check the labeels of the tree
to substitute patterns that have to do with method or source
'temp <- gsub(andi.tree$tip.label, pattern="_nanoassembly",
replacement="")'
'temp2 <- gsub(temp, pattern="_ncbi", replacement="")'
to directly substitute individual elements
temp2[1] <- "bh69"
to directly substitute ranges of elements
'temp2[15:18] <- c("Pfungorum", "Psprentiae", "Pterrae",
"Pxenovorans")'
this is the last thing to clean up that was inconsistent across names
'temp3 <- gsub(temp2, pattern="qs", replacement="")'
tp replace the original labels
andi.tree$tip.label <- temp3
to clean up unnecessary files
rm(temp, temp2, temp3)
To add node numbers = nodelabels()
'rooted.andi <- root.phylo(andi.tree, node=34, resolve.root=T)
plot(rooted.andi, cex=1, label.offset=0.005)' TO root the tree with the outgroup node
To look at the species tree output from orthofinder 'ortho.tree <- read.tree("/courses/bi278/Course_Materials/lab_07/orthofinder_18taxa/Species_Tree/SpeciesTree_rooted.txt")
plot(ortho.tree)' then plot
to fix tip labels ortho.tree$tip.label
temp <- gsub(ortho.tree$tip.label, pattern="_prokka",
replacement="")
temp2 <- gsub(temp, pattern="qs", replacement="")
ortho.tree$tip.label <- temp2
rm(temp, temp2)
To root the tree
plot(ortho.tree)
nodelabels()
rooted.ortho <-root.phylo(ortho.tree, node=25, resolve.root=T)
plot(rooted.ortho, cex=1, label.offset=0.005)'
then rotate the nodes of this tree so the tips match more closely the tips of the tree from andi. rotate.ortho <- rotateConstr(rooted.ortho,
rooted.andi$tip.label)
plot(rotate.ortho)
compare topology (branching pattern)
all.equal(andi.tree, ortho.tree, use.edge.length=FALSE)
plot both trees side by side
cophyloplot(rooted.andi, rotate.ortho)
To compare our gene trees now.
the tree I generated with IQ-TREE
iq.tree <-
read.tree("/home2/snoh/lab_07/OG0000084_mafft.faa.treefile")
check and root this tree
plot(iq.tree)
nodelabels()
rooted.iq84 <- root.phylo(iq.tree, node=33, resolve.root=T)
plot(rooted.iq84, cex=0.75, label.offset=0.005)
the gene tree from orthofinder
gene.tree <-
read.tree("/courses/bi278/Course_Materials/lab_07/orthofinder_1
8taxa/Gene_Trees/OG0000084_tree.txt")
check and root this tree too
plot(gene.tree)
nodelabels()
rooted.or84 <- root.phylo(gene.tree, node=55, resolve.root=T)
plot(rooted.or84, cex=0.75, label.offset=0.005)
rotate and compare
rotate.iq84 <- rotateConstr(rooted.iq84, rooted.or84$tip.label)
all.equal(rooted.or84, rooted.iq84, use.edge.length=FALSE)
cophyloplot(rooted.or84, rotate.iq84)
Saved and DONE