# Lab 7 First made directory "lab_07" 'ls /courses/bi278/Course_Materials/lab_07/* > andi_inputs.txt' to prepare a file with all the genomes we want to build a phylogeny with To run Andi with the file that was just prepared 'andi --file-of-filenames=andi_inputs.txt -t 2 --join > andi_output.dist' 'awk -F "\t" '$3=="n0" && $4!="" && $5!="" && $6!="" && $7!="" && $8!="" && $9!="" && $10!="" && $11!="" && $12!="" && $13!"" && $14!="" && $15!="" && $16!="" && $17!="" && $18!="" && $19!="" && $20!="" && $21!=""' /courses/bi278/Course_Materials/lab_07/orthofinder_18taxa/Phylogenetic_Hierarchical_Orthogroups/N0.tsv > 18taxa_coregenes.txt' (to identify orthogroups with genes present from each genome. I want all the rows where orthologous groups share a common ancestor at node 0 (zero) and each genome has at least one gene member in the group.) Next, 'sed 's/, /\t/g' 18taxa_coregenes.txt | awk '{print $1, NF-1}'' to see how many genes are included in each orthogroup from above. If there are only single copy genes, then the number of genes should be the same as the number of genomes included in the orthofinder analysis (18). 'mafft --maxiterate 3 /courses/bi278/Course_Materials/lab_07/orthofinder_18taxa/Ortho group_Sequences/OG0000084.fa > OG0000084_mafft.faa' to genterate a multiple sequence analysis for the chosen file using mafft Then used the MSA as the input for iqtree 'iqtree -s OG0000084_mafft.faa' Can then see the tree using 'cat OG0000084_mafft.faa.iqtree' R Studio library(ape) and library(phytools) to load phylogenetics packages 'andi.dist <- read.table("/home2/dkalde22/andi_output.dist", header=F, row.names=1, skip=1)") AND colnames(andi.dist) <- rownames(andi.dist) to take the distance matrix generated by andi and make a neighbor-joining tree from it. Then 'andi.tree <- nj(as.dist(andi.dist))' to infer tree from distance matrix then plotted tree using plot(andi.tree) andi.tree$tip.label to check the labeels of the tree to substitute patterns that have to do with method or source 'temp <- gsub(andi.tree$tip.label, pattern="_nanoassembly", replacement="")' 'temp2 <- gsub(temp, pattern="_ncbi", replacement="")' to directly substitute individual elements temp2[1] <- "bh69" to directly substitute ranges of elements 'temp2[15:18] <- c("Pfungorum", "Psprentiae", "Pterrae", "Pxenovorans")' this is the last thing to clean up that was inconsistent across names 'temp3 <- gsub(temp2, pattern="qs", replacement="")' tp replace the original labels andi.tree$tip.label <- temp3 to clean up unnecessary files rm(temp, temp2, temp3) To add node numbers = nodelabels() 'rooted.andi <- root.phylo(andi.tree, node=34, resolve.root=T) plot(rooted.andi, cex=1, label.offset=0.005)' TO root the tree with the outgroup node To look at the species tree output from orthofinder 'ortho.tree <- read.tree("/courses/bi278/Course_Materials/lab_07/orthofinder_18taxa/Species_Tree/SpeciesTree_rooted.txt") plot(ortho.tree)' then plot to fix tip labels ortho.tree$tip.label temp <- gsub(ortho.tree$tip.label, pattern="_prokka", replacement="") temp2 <- gsub(temp, pattern="qs", replacement="") ortho.tree$tip.label <- temp2 rm(temp, temp2) To root the tree plot(ortho.tree) nodelabels() rooted.ortho <-root.phylo(ortho.tree, node=25, resolve.root=T) plot(rooted.ortho, cex=1, label.offset=0.005)' then rotate the nodes of this tree so the tips match more closely the tips of the tree from andi. rotate.ortho <- rotateConstr(rooted.ortho, rooted.andi$tip.label) plot(rotate.ortho) compare topology (branching pattern) all.equal(andi.tree, ortho.tree, use.edge.length=FALSE) plot both trees side by side cophyloplot(rooted.andi, rotate.ortho) To compare our gene trees now. the tree I generated with IQ-TREE iq.tree <- read.tree("/home2/snoh/lab_07/OG0000084_mafft.faa.treefile") check and root this tree plot(iq.tree) nodelabels() rooted.iq84 <- root.phylo(iq.tree, node=33, resolve.root=T) plot(rooted.iq84, cex=0.75, label.offset=0.005) the gene tree from orthofinder gene.tree <- read.tree("/courses/bi278/Course_Materials/lab_07/orthofinder_1 8taxa/Gene_Trees/OG0000084_tree.txt") check and root this tree too plot(gene.tree) nodelabels() rooted.or84 <- root.phylo(gene.tree, node=55, resolve.root=T) plot(rooted.or84, cex=0.75, label.offset=0.005) rotate and compare rotate.iq84 <- rotateConstr(rooted.iq84, rooted.or84$tip.label) all.equal(rooted.or84, rooted.iq84, use.edge.length=FALSE) cophyloplot(rooted.or84, rotate.iq84) Saved and DONE