# Lab Exersize 1 ## Mark Young & Emily Larson ``` ## packages library(borealis) #setwd("/personal/mgyoun21/bi376") create.tps( input.filename = "Ofas.RNAi.nota.digitization.csv", output.filename = "Ofas.RNAi.nota.tps", id.factors = c('digitizer','treatment'), include.scale = TRUE, invert.scale = TRUE) # read in shape data shapes <- read.tps("Ofas.RNAi.nota.tps", keep.original.ids = TRUE) # look at the names of the things in the shapes list names(shapes) #coord: array of coordinate shape data #landmark.number: num of landmarks #specimen.number: num of specimens #scaled: logical value stating whether coordinates have been scaled #metadata: df of sample meta #provenance: the data prov names(shapes$provenance) cat(shapes$provenance$read.tps) # plot a shape - default to number 1 landmark.plot(shapes) landmark.plot(shapes, specimen.number = 2) landmark.plot(shapes$coords[,,3]) # connect the landmarks in the plot w/ a matrix of connections nota.lines <- matrix(c(1,2, 2,3, 3,4, 4,5, 5,6, 6,7, 7,8, 8,1, 2,6), ncol = 2, byrow = TRUE) landmark.plot(shapes, links = nota.lines) # some of the landmarks are flipped, flip back shapes <- align.reflect(shapes, links = nota.lines) # run GPA so that we can compare shapes # also do sum outlier analysis # kept all outliers shapes.gpa <- align.procrustes(shapes, outlier.analysis = TRUE) # see plot of all procrustes aligned coordinates plot(shapes.gpa$gpagen) # look at centroid size boxplot(shapes.gpa$Csize ~ shapes.gpa$metadata$treatment) # some dont have centroids and appear massive # toss them x <- shapes.gpa$Csize > 500 shapes.gpa$Csize[x] <- NA # remove treatment typos unique(shapes.gpa$metadata$treatment) x <- shapes.gpa$metadata$treatment == "vr" shapes.gpa$metadata$treatment[x] <- "vg" x <- shapes.gpa$metadata$treatment == "wildtype" shapes.gpa$metadata$treatment[x] <- "wt" unique(shapes.gpa$metadata$treatment) boxplot(shapes.gpa$Csize ~ shapes.gpa$metadata$treatment) # covert the gpagen and metadata elements of # gpa list to geomorph.data.frame shapes.gpa <- listed.gdf(shapes.gpa) names(shapes.gpa) # pre-processing done, generate a report on data prov write.provenance( shapes.gpa, output.filename = "Ofas.nota.shape.provenance.md", title = "Milkweed bug nota shape data provenance" ) # Use Kendall's tangent space projection to find variance # in shapes. this is similar to PCA shapes.pca <- gm.prcomp(shapes.gpa$gdf$coords) summary(shapes.pca) # plot the PCA plot(shapes.pca) # check effect of digitizer plot(shapes.pca, col = as.factor(shapes.gpa$gdf$digitizer)) # now check treatment plot(shapes.pca, col = as.factor(shapes.gpa$gdf$treatment)) # make a gMMplot # outline of each group surrounded by convex hull ggGMMplot(shapes.pca, group = shapes.gpa$gdf$treatment, group.title = 'treatment', convex.hulls = TRUE, include.legend = TRUE) # add backtransformations to see what # the PCs actually do ``` ![](https://i.imgur.com/JYbSDt2.png) ## Problem 1 ``` ggGMMplot(shapes.pca, group = shapes.gpa$gdf$treatment, group.title = 'treatment', convex.hulls = TRUE, backtransform.examples = TRUE, ref.shape = mshape(shapes.gpa$gdf$coords), bt.links = nota.lines) ``` ![](https://i.imgur.com/Gfepyjp.png) ## Challenge 1 ``` ggGMMplot(shapes.pca, group = shapes.gpa$gdf$treatment, group.title = 'treatment', convex.hulls = TRUE, backtransform.examples = TRUE, ref.shape = mshape(shapes.gpa$gdf$coords), bt.links = nota.lines, bt.margin.factor = 5, bt.legend.position = c(.7,1) ) ``` ![](https://i.imgur.com/gkzN9dO.png) # Part 2: Pupfish ``` data("pupfish", package="geomorph") # remove outliers and explore data, conduct GPA # there are 56 points per pupfish dim(pupfish$coords[,,1]) # make the lines thing pup.lines <- c() for (i in c(1:55)){ pup.lines <- c(pup.lines, i, i+1) } pup.lines <- matrix(pup.lines, ncol = 2, byrow = TRUE) landmark.plot(pupfish, links = pup.lines) pupfish <- align.reflect(pupfish, links = pup.lines) pupfish$metadata <- data.frame('Sex' = pupfish$Sex, 'Population' = pupfish$Pop) # create the gpa # removed 1 outlier pupfish.gpa <- align.procrustes(pupfish, outlier.analysis = TRUE) # get rid of point 22 in the metdata pupfish.gpa$metadata <- pupfish.gpa$metadata[-c(22),] # look at centroids - no clear outliers boxplot(pupfish.gpa$CS ~ pupfish.gpa$Sex) boxplot(pupfish.gpa$CS ~ pupfish.gpa$Pop) # create a shape space plot that highlights sex and or population pupfish.gpa <- listed.gdf(pupfish.gpa) pupfish.pca <- gm.prcomp(pupfish.gpa$gdf$coords) ggGMMplot(pupfish.pca, group = pupfish.gpa$gdf$Sex, group.title = 'Sex', convex.hulls = TRUE, include.legend = TRUE, backtransform.examples = TRUE, ref.shape = mshape(pupfish.gpa$gdf$coords), bt.links = pup.lines, target.pt.size = 0.1, ref.pt.size = 0.1, ) ``` ![](https://i.imgur.com/lw3yGz8.png) ``` # all sex x population combinations pupfish.gpa$gdf$SexPop <- paste(pupfish.gpa$gdf$Sex, pupfish.gpa$gdf$Population) ggGMMplot(pupfish.pca, group = pupfish.gpa$gdf$SexPop, group.title = 'Sex x Population', convex.hulls = TRUE, include.legend = TRUE, backtransform.examples = TRUE, ref.shape = mshape(pupfish.gpa$gdf$coords), bt.links = pup.lines, target.pt.size = 0.1, ref.pt.size = 0.1, ) ``` ![](https://i.imgur.com/4h7bNoc.png) The sinkhole Males have the largest shape variation.