# Lab Exersize 1
## Mark Young & Emily Larson
```
## packages
library(borealis)
#setwd("/personal/mgyoun21/bi376")
create.tps(
input.filename = "Ofas.RNAi.nota.digitization.csv",
output.filename = "Ofas.RNAi.nota.tps",
id.factors = c('digitizer','treatment'),
include.scale = TRUE,
invert.scale = TRUE)
# read in shape data
shapes <- read.tps("Ofas.RNAi.nota.tps", keep.original.ids = TRUE)
# look at the names of the things in the shapes list
names(shapes)
#coord: array of coordinate shape data
#landmark.number: num of landmarks
#specimen.number: num of specimens
#scaled: logical value stating whether coordinates have been scaled
#metadata: df of sample meta
#provenance: the data prov
names(shapes$provenance)
cat(shapes$provenance$read.tps)
# plot a shape - default to number 1
landmark.plot(shapes)
landmark.plot(shapes, specimen.number = 2)
landmark.plot(shapes$coords[,,3])
# connect the landmarks in the plot w/ a matrix of connections
nota.lines <- matrix(c(1,2, 2,3, 3,4, 4,5, 5,6, 6,7, 7,8, 8,1, 2,6),
ncol = 2, byrow = TRUE)
landmark.plot(shapes, links = nota.lines)
# some of the landmarks are flipped, flip back
shapes <- align.reflect(shapes, links = nota.lines)
# run GPA so that we can compare shapes
# also do sum outlier analysis
# kept all outliers
shapes.gpa <- align.procrustes(shapes, outlier.analysis = TRUE)
# see plot of all procrustes aligned coordinates
plot(shapes.gpa$gpagen)
# look at centroid size
boxplot(shapes.gpa$Csize ~ shapes.gpa$metadata$treatment)
# some dont have centroids and appear massive
# toss them
x <- shapes.gpa$Csize > 500
shapes.gpa$Csize[x] <- NA
# remove treatment typos
unique(shapes.gpa$metadata$treatment)
x <- shapes.gpa$metadata$treatment == "vr"
shapes.gpa$metadata$treatment[x] <- "vg"
x <- shapes.gpa$metadata$treatment == "wildtype"
shapes.gpa$metadata$treatment[x] <- "wt"
unique(shapes.gpa$metadata$treatment)
boxplot(shapes.gpa$Csize ~ shapes.gpa$metadata$treatment)
# covert the gpagen and metadata elements of
# gpa list to geomorph.data.frame
shapes.gpa <- listed.gdf(shapes.gpa)
names(shapes.gpa)
# pre-processing done, generate a report on data prov
write.provenance(
shapes.gpa,
output.filename = "Ofas.nota.shape.provenance.md",
title = "Milkweed bug nota shape data provenance"
)
# Use Kendall's tangent space projection to find variance
# in shapes. this is similar to PCA
shapes.pca <- gm.prcomp(shapes.gpa$gdf$coords)
summary(shapes.pca)
# plot the PCA
plot(shapes.pca)
# check effect of digitizer
plot(shapes.pca, col = as.factor(shapes.gpa$gdf$digitizer))
# now check treatment
plot(shapes.pca, col = as.factor(shapes.gpa$gdf$treatment))
# make a gMMplot
# outline of each group surrounded by convex hull
ggGMMplot(shapes.pca,
group = shapes.gpa$gdf$treatment,
group.title = 'treatment',
convex.hulls = TRUE,
include.legend = TRUE)
# add backtransformations to see what
# the PCs actually do
```

## Problem 1
```
ggGMMplot(shapes.pca,
group = shapes.gpa$gdf$treatment,
group.title = 'treatment',
convex.hulls = TRUE,
backtransform.examples = TRUE,
ref.shape = mshape(shapes.gpa$gdf$coords),
bt.links = nota.lines)
```

## Challenge 1
```
ggGMMplot(shapes.pca,
group = shapes.gpa$gdf$treatment,
group.title = 'treatment',
convex.hulls = TRUE,
backtransform.examples = TRUE,
ref.shape = mshape(shapes.gpa$gdf$coords),
bt.links = nota.lines,
bt.margin.factor = 5,
bt.legend.position = c(.7,1)
)
```

# Part 2: Pupfish
```
data("pupfish", package="geomorph")
# remove outliers and explore data, conduct GPA
# there are 56 points per pupfish
dim(pupfish$coords[,,1])
# make the lines thing
pup.lines <- c()
for (i in c(1:55)){
pup.lines <- c(pup.lines, i, i+1)
}
pup.lines <- matrix(pup.lines, ncol = 2, byrow = TRUE)
landmark.plot(pupfish, links = pup.lines)
pupfish <- align.reflect(pupfish, links = pup.lines)
pupfish$metadata <- data.frame('Sex' = pupfish$Sex, 'Population' = pupfish$Pop)
# create the gpa
# removed 1 outlier
pupfish.gpa <- align.procrustes(pupfish, outlier.analysis = TRUE)
# get rid of point 22 in the metdata
pupfish.gpa$metadata <- pupfish.gpa$metadata[-c(22),]
# look at centroids - no clear outliers
boxplot(pupfish.gpa$CS ~ pupfish.gpa$Sex)
boxplot(pupfish.gpa$CS ~ pupfish.gpa$Pop)
# create a shape space plot that highlights sex and or population
pupfish.gpa <- listed.gdf(pupfish.gpa)
pupfish.pca <- gm.prcomp(pupfish.gpa$gdf$coords)
ggGMMplot(pupfish.pca,
group = pupfish.gpa$gdf$Sex,
group.title = 'Sex',
convex.hulls = TRUE,
include.legend = TRUE,
backtransform.examples = TRUE,
ref.shape = mshape(pupfish.gpa$gdf$coords),
bt.links = pup.lines,
target.pt.size = 0.1,
ref.pt.size = 0.1,
)
```

```
# all sex x population combinations
pupfish.gpa$gdf$SexPop <- paste(pupfish.gpa$gdf$Sex, pupfish.gpa$gdf$Population)
ggGMMplot(pupfish.pca,
group = pupfish.gpa$gdf$SexPop,
group.title = 'Sex x Population',
convex.hulls = TRUE,
include.legend = TRUE,
backtransform.examples = TRUE,
ref.shape = mshape(pupfish.gpa$gdf$coords),
bt.links = pup.lines,
target.pt.size = 0.1,
ref.pt.size = 0.1,
)
```

The sinkhole Males have the largest shape variation.