###### tags: `ノート` `TS` `解析` `single-cell RNA-seq`
# 22-11: ES由来セルトリ細胞scRNAseq解析 リバイズ
<br>
#### 2023/3/7
### 【 作業場所&保存場所 】
• 作業場所
~/osc-fs/22_TS_FTSLC_scRNAseq_2022/Seurat_Non_aggr
---
### 1. Nr5a1 遺伝子に絞ってFeaturePlot
[22-03: ES由来セルトリ細胞scRNAseq解析 その3](https://hackmd.io/@mae-shi/rJ6Gs_xvc) の、05_20220516_cl02.RDataのdata.integratedで、Nr5a1のfeatureplotを各サンプル毎で。
```r
setwd("~/osc-fs/22_TS_FTSLC_scRNAseq_2022/Seurat_Non_aggr")
library(Seurat)
load("05_20220516_cl02.RData")
# 確認
DimPlot(object = data.integrated , reduction = "umap" , label = TRUE , split.by = "sample")
# metadata確認
head(data.integrated@meta.data)
unique(data.integrated[["sample"]])
# Nr5a1でfeatureplot
FeaturePlot(object=data.integrated, features= "Nr5a1", col=c("gray", "red"), split.by="sample", min.cutoff=0, pt.size=1,order =T)
# min.cutoff=0 や、pt.size=0.5とpt.size=0.75もplot
```
※ order =T オプションをつけると陽性細胞が前面に表示されるらしい。
参照:[order引数をTRUE](https://zenn.dev/rchiji/articles/6671288e86b5dc#featureplot%E3%81%AE%E5%80%A4%E3%81%AE%E7%AF%84%E5%9B%B2)
<br>
min.cutoff=0.1, pt.size=1

min.cutoff=0, pt.size=0.75

min.cutoff=0, pt.size=0.5

<br>
<br>
### 2-1. Correlation Matrix (24 x 24のマトリックス)
[22-05:ES由来セルトリ細胞scRNAseq解析 第2弾 Sertoli 発現クラスターを抽出](https://hackmd.io/@mae-shi/rk-HG7Quc) の、data.integrated.gsc16_02 のデータをもとに、各サンプル(8サンプル)を3クラスターに分けたもので Correlation Matrix (24 x 24のマトリックス)。
```r
load("11_data.integrated.gsc16_02.RData")
#確認
DimPlot(object = data.integrated.gsc16_02 , reduction = "umap" , label = TRUE , split.by = "sample")
#metadata確認
head(data.integrated.gsc16_02@meta.data)
unique(data.integrated.gsc16_02[["sample"]])
# metadata追加
data.integrated.gsc16_02 @ meta.data $ sample_clust <- paste0(data.integrated.gsc16_02 @ meta.data $ sample , "-" , data.integrated.gsc16_02 @ meta.data $ seurat_clusters)
Idents(data.integrated.gsc16_02) <- data.integrated.gsc16_02 @ meta.data $ sample_clust
av.exp <- AverageExpression(data.integrated.gsc16_02)$integrated
cor.exp <- cor(av.exp)
hc <- hclust(dist(cor.exp))
breaks1 <- seq(0.97, 1, length=500) #0.96-0.98くらいで調整
mycols1 <- colorRamp2(breaks1,colorRampPalette(brewer.pal(9, "GnBu"))(500))
# ヒートマップ
ht1 <- Heatmap(cor.exp,
col = mycols1,
column_title = "Correlation Matrix of 1,6 cluster for gonadal somatic cells ",
column_title_gp = gpar(fontsize = 35),
name = "R", #title of legend,
row_names_gp = gpar(fontsize = 16),
column_names_gp = gpar(fontsize = 16),
column_names_rot = 90,
row_dend_width = unit(4, "cm"),
column_dend_height = unit(4, "cm"),
row_dend_side = "right",
column_dend_side = "bottom",
cluster_rows = hc,
cluster_columns = hc,
heatmap_height = unit(40, "cm"),
heatmap_width = unit(40, "cm"),
show_heatmap_legend = FALSE,
cell_fun = function(j, i, x, y, width, height, fill){
grid.rect(x = x, y = y, width = width, height = height, gp = gpar(col = "black", fill = NA))
}
)
# 凡例を別に設定
lgd = Legend(col_fun = mycols1, title = "R", legend_height = unit(6, "cm"))
# 描く
pdf("Correlation Matrix.pdf", height=20, width=20)
ht1
draw(lgd, x = unit(0.93, "npc"), y = unit(0.55, "npc"))
dev.off()
```

<br>
<br>
### 2-2. Top5遺伝子のHeatmap
[22-05:ES由来セルトリ細胞scRNAseq解析 第2弾 Sertoli 発現クラスターを抽出](https://hackmd.io/@mae-shi/rk-HG7Quc) の、data.integrated.gsc16_02 のデータをもとに、各サンプル(8サンプル)を3クラスターに分けたもので Heatmap(Top5遺伝子)。
```r
# 確認
head(data.integrated.gsc16_02@meta.data)
# sample_clust並べ替える
junban = c("XX_10.5-0", "XX_10.5-1", "XX_10.5-2", "XX_11.5-0", "XX_11.5-1", "XX_11.5-2", "XX_12.5-0", "XX_12.5-1", "XX_12.5-2", "XX_FOSLCs-0", "XX_FOSLCs-1", "XX_FOSLCs-2", "XY_10.5-0", "XY_10.5-1", "XY_10.5-2", "XY_11.5-0", "XY_11.5-1", "XY_11.5-2", "XY_12.5-0", "XY_12.5-1", "XY_12.5-2", "XY_FTSLCs-0", "XY_FTSLCs-1", "XY_FTSLCs-2")
Idents(data.integrated.gsc16_02) <- factor(Idents(data.integrated.gsc16_02), levels= junban)
# クラスター特異的遺伝の同定
integrated.markers <- FindAllMarkers(
object = data.integrated.gsc16_02,
only.pos = FALSE,
min.pct = 0.25,
logfc.threshold = 0.25,
test.use = "MAST")
# 計算に少し時間がかかるのでこのオブジェクトはsaveしておく
save(list=ls() , file="gsc16_02_integrated.markers_240322.RData")
integrated.markers %>%
filter(avg_log2FC >= 0) %>%
group_by(cluster) %>%
top_n(5, avg_log2FC) -> top5.posi
DoHeatmap(object = data.integrated.gsc16_02, features=top5.posi$gene, size=3)
```

<br>
<br>
### 2-3. XYサンプルのみで、Correlation MatrixとTop5遺伝子Heatmap
data.integrated.gsc16_02 データの XYサンプルのみ抽出し、で Correlation MatrixとHeatmap作成。
```r
# XYだけ抽出
data.integrated.gsc16_02_XY <- subset(data.integrated.gsc16_02, subset=sample==c("XY_10.5", "XY_11.5", "XY_12.5","XY_FTSLCs"))
# 抽出確認
unique(data.integrated.gsc16_02_XY[["sample"]])
DimPlot(object = data.integrated.gsc16_02_XY, label = TRUE, split.by = "sample")
save(data.integrated.gsc16_02_XY, file="240404_data.integrated.gsc16_02_XY.RData")
#Correlation matrix
#correlation
av.exp <- AverageExpression(data.integrated.gsc16_02_XY)$integrated
cor.exp <- cor(av.exp)
#クラスタリング
hc <- hclust(dist(cor.exp))
breaks1 <- seq(0.96, 1, length=500)
mycols1 <- colorRamp2(breaks1,colorRampPalette(brewer.pal(9, "GnBu"))(500))
#Heatmap
ht1 <- Heatmap(cor.exp,
col = mycols1,
column_title = "Correlation Matrix of 1,6 cluster for gonadal somatic cells in XY ",
column_title_gp = gpar(fontsize = 35),
name = "R", #title of legend,
row_names_gp = gpar(fontsize = 25),
column_names_gp = gpar(fontsize = 25),
column_names_rot = 90,
row_dend_width = unit(4, "cm"),
column_dend_height = unit(4, "cm"),
row_dend_side = "right",
column_dend_side = "bottom",
cluster_rows = hc,
cluster_columns = hc,
heatmap_height = unit(40, "cm"),
heatmap_width = unit(40, "cm"),
show_heatmap_legend = FALSE,
cell_fun = function(j, i, x, y, width, height, fill){
grid.rect(x = x, y = y, width = width, height = height, gp = gpar(col = "black", fill = NA))
}
)
# 凡例を別に設定
lgd = Legend(col_fun = mycols1, title = "R", title_gp = gpar(fontsize = 30),legend_height = unit(6, "cm"), grid_width = unit(1, "cm"), labels_gp = gpar(fontsize = 20))
# 描く
pdf("240404_CorrelationMatrix_gsc_16clust02res_XY_096.pdf", height=20, width=20)
ht1
draw(lgd, x = unit(0.93, "npc"), y = unit(0.55, "npc"))
dev.off()
# Heatmap
# sample_clust並べ替える
junban = c( "XY_10.5-0", "XY_10.5-1", "XY_10.5-2", "XY_11.5-0", "XY_11.5-1", "XY_11.5-2", "XY_12.5-0", "XY_12.5-1", "XY_12.5-2", "XY_FTSLCs-0", "XY_FTSLCs-1", "XY_FTSLCs-2")
Idents(data.integrated.gsc16_02_XY) <- factor(Idents(data.integrated.gsc16_02_XY), levels= junban)
# クラスター特異的遺伝の同定
integrated.markers <- FindAllMarkers(
object = data.integrated.gsc16_02_XY,
only.pos = FALSE,
min.pct = 0.25,
logfc.threshold = 0.25,
test.use = "MAST")
# 計算に少し時間がかかるのでこのオブジェクトはsaveしておく
save(integrated.markers , file="gsc16_02_XY_integrated.markers_240404.RData")
integrated.markers %>%
filter(avg_log2FC >= 0) %>%
group_by(cluster) %>%
top_n(5, avg_log2FC) -> top5.posi
#240404_top5_heatmap_gsc_16clust02res_XY.pdf (20x15)
DoHeatmap(object = data.integrated.gsc16_02_XY, features=top5.posi$gene) + theme(axis.text=element_text(size=10))
```

<br>
```r
# XY E12.5とFTSLCだけ抽出
data.integrated.gsc16_02_XY12.5_FTSLC <- subset(data.integrated.gsc16_02, subset=sample==c("XY_12.5","XY_FTSLCs"))
# 抽出確認
unique(data.integrated.gsc16_02_XY12.5_FTSLC[["sample"]])
DimPlot(object = data.integrated.gsc16_02_XY12.5_FTSLC, label = TRUE, split.by = "sample")
save(data.integrated.gsc16_02_XY12.5_FTSLC, file="240404_data.integrated.gsc16_02_XY12.5_FTSLC.RData")
#Correlation Matrix
#correlation
av.exp <- AverageExpression(data.integrated.gsc16_02_XY12.5_FTSLC)$integrated
cor.exp <- cor(av.exp)
#クラスタリング
hc <- hclust(dist(cor.exp))
breaks1 <- seq(0.97, 1, length=500)
mycols1 <- colorRamp2(breaks1,colorRampPalette(brewer.pal(9, "GnBu"))(500))
#Heatmap
ht1 <- Heatmap(cor.exp,
col = mycols1,
column_title = "Correlation Matrix of 1,6 cluster for gonadal somatic cells in XY12.5 and FTSLCs ",
column_title_gp = gpar(fontsize = 25),
name = "R", #title of legend,
row_names_gp = gpar(fontsize = 25),
column_names_gp = gpar(fontsize = 25),
column_names_rot = 90,
row_dend_width = unit(4, "cm"),
column_dend_height = unit(4, "cm"),
row_dend_side = "right",
column_dend_side = "bottom",
cluster_rows = hc,
cluster_columns = hc,
heatmap_height = unit(40, "cm"),
heatmap_width = unit(40, "cm"),
show_heatmap_legend = FALSE,
cell_fun = function(j, i, x, y, width, height, fill){
grid.rect(x = x, y = y, width = width, height = height, gp = gpar(col = "black", fill = NA))
}
)
# 凡例を別に設定
lgd = Legend(col_fun = mycols1, title = "R", title_gp = gpar(fontsize = 30),legend_height = unit(6, "cm"), grid_width = unit(1, "cm"), labels_gp = gpar(fontsize = 20))
# 描く
pdf("240404_CorrelationMatrix_gsc_16clust02res_XY12.5_FTSLC_097.pdf", height=20, width=20)
ht1
draw(lgd, x = unit(0.93, "npc"), y = unit(0.55, "npc"))
dev.off()
# Heatmap
# sample_clust並べ替える
junban = c("XY_12.5-0", "XY_12.5-1", "XY_12.5-2", "XY_FTSLCs-0", "XY_FTSLCs-1", "XY_FTSLCs-2")
Idents(data.integrated.gsc16_02_XY12.5_FTSLC) <- factor(Idents(data.integrated.gsc16_02_XY12.5_FTSLC), levels= junban)
# クラスター特異的遺伝の同定
integrated.markers <- FindAllMarkers(
object = data.integrated.gsc16_02_XY12.5_FTSLC,
only.pos = FALSE,
min.pct = 0.25,
logfc.threshold = 0.25,
test.use = "MAST")
# 計算に少し時間がかかるのでこのオブジェクトはsaveしておく
save(integrated.markers, file="gsc16_02_XY12.5_FTSLC_integrated.markers_240404.RData")
integrated.markers %>%
filter(avg_log2FC >= 0) %>%
group_by(cluster) %>%
top_n(5, avg_log2FC) -> top5.posi
#240404_top5_heatmap_gsc_16clust02res_XY12.5_FTSLC.pdf (20x15)
DoHeatmap(object = data.integrated.gsc16_02_XY12.5_FTSLC, features=top5.posi$gene) + theme(axis.text=element_text(size=10))
```

<br>
<br>
### 3. Nr5a1 Positive CellのBarPlot
Nr5a1のfeatureplotを各サンプルでNr5a1 positive cellの割合を棒グラフにしたい。
positive cellの定義は発現が0ではないもの。
#### ☆ data.integrated
```r
# Nr5a1スコア追記
Nr5a1_di <- AddModuleScore(object = data.integrated, features= "Nr5a1", name = "Nr5a1_score")
# 確認
summary(Nr5a1_di@meta.data$Nr5a1_score)
Min. 1st Qu. Median Mean 3rd Qu. Max.
-1.98562 -0.23570 -0.10885 -0.02739 0.04595 2.88657
# Nr5a1スコアが0より大きいもの抽出
Nr5a1_di_posi <- Nr5a1_di@meta.data[Nr5a1_di@meta.data[, "Nr5a1_score1"] > 0 , , drop = FALSE]
# Nr5a1スコアが0より大きいものの数を格納
Nr5a1_di_posi_counts <- table(Nr5a1_di_posi$sample)
# 確認
summary(Nr5a1_di_posi$Nr5a1_score1)
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.000007 0.073988 0.302654 0.381799 0.600817 2.886568
# 各サンプルごとのposiの数
Nr5a1_di_posi_counts
XX_10.5 XX_11.5 XX_12.5 XX_FOSLCs XY_10.5 XY_11.5 XY_12.5 XY_FTSLCs
1463 766 839 2299 1132 760 890 703
# データフレームに変換
di_posi <- as.data.frame(Nr5a1_di_posi_counts)
colnames(di_posi) <- c("sample", "posi")
# Nr5a1スコアが0より大きいもののbarplot (240416_cl02_posi_barplot, 20 x 10)
ggplot(di_posi, aes(x = sample, y = posi)) + geom_col(fill="blue", colour="black") + labs(title="Nr5a1 Positive Cell Count", x="samples", y="positive cell counts") + geom_text(aes(label=posi), vjust=-1.5) + scale_y_continuous(limits=c(0,3000))
# 各サンプルごとの Nr5a1スコア
Nr5a1_di_counts <- table(Nr5a1_di$sample)
di_all <- as.data.frame(Nr5a1_di_counts)
colnames(di_all) <- c("sample", "all")
# 各サンプルごと細胞数
Nr5a1_di_counts
XX_10.5 XX_11.5 XX_12.5 XX_FOSLCs XY_10.5 XY_11.5 XY_12.5 XY_FTSLCs
5423 2482 2859 7562 3837 2101 2764 2441
# くっつける
di_data <- left_join(di_all, di_posi, by = "sample")
colnames(di_data) <- c("sample", "all", "posi")
# 各サンプルごとのpositive cellsの割合を計算し追記
di_data <- mutate(di_data, prop = (di_data$posi/di_data$all)*100)
# 四捨五入
di_data$prop <- round(di_data$prop, 1)
# 確認
> di_data
sample all posi prop
1 XX_10.5 5423 1463 27.0
2 XX_11.5 2482 766 30.9
3 XX_12.5 2859 839 29.3
4 XX_FOSLCs 7562 2299 30.4
5 XY_10.5 3837 1132 29.5
6 XY_11.5 2101 760 36.2
7 XY_12.5 2764 890 32.2
8 XY_FTSLCs 2441 703 28.8
# barplot (240416_di_posi_prop_barplot, 20 x 10)
ggplot(di_data, aes(x = sample, y = prop)) + geom_col(fill="blue", colour="black") + labs(title="Percentage of Nr5a1 positive cells", x="samples", y="%") + geom_text(aes(label=prop), vjust=-1.5) + scale_y_continuous(limits=c(0,50))
```

<br>
#### ☆ data.integrated.gsc16_02
```r
# Nr5a1スコア追記
Nr5a1_gsc <- AddModuleScore(object = data.integrated.gsc16_02, features= "Nr5a1", name = "Nr5a1_score")
# 確認
summary(Nr5a1_gsc@meta.data$Nr5a1_score)
# Nr5a1スコアが0より大きいもの抽出
Nr5a1_gsc_posi <- Nr5a1_gsc@meta.data[Nr5a1_gsc@meta.data[, "Nr5a1_score1"] > 0 , , drop = FALSE]
summary(Nr5a1_gsc_posi$Nr5a1_score1)
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.001985 0.200387 0.369729 0.430746 0.594775 2.698969
# 確認
> Nr5a1_gsc_posi_counts
XX_10.5 XX_11.5 XX_12.5 XX_FOSLCs XY_10.5 XY_11.5 XY_12.5 XY_FTSLCs
21 354 506 999 36 317 328 250
# Nr5a1スコアが0より大きいものの数を格納しデータフレームに変換
Nr5a1_gsc_posi_counts <- table(Nr5a1_gsc_posi$sample)
gsc_posi <- as.data.frame(Nr5a1_gsc_posi_counts)
colnames(gsc_posi) <- c("sample", "posi")
# barplot (240416_gsc_posi_barplot, 20 x 10)
ggplot(gsc_posi, aes(x = sample, y = posi)) + geom_col(fill="blue", colour="black") + labs(title="Nr5a1 Positive Cell Count in gsc", x="samples", y="positive cell counts") + geom_text(aes(label=posi), vjust=-1.5) + scale_y_continuous(limits=c(0,1500))
# 各サンプルごとの Nr5a1スコア
Nr5a1_gsc_counts <- table(Nr5a1_gsc$sample)
gsc_all <- as.data.frame(Nr5a1_gsc_counts)
colnames(gsc_all) <- c("sample", "all")
# くっつける
gsc_data <- left_join(gsc_all, gsc_posi, by = "sample")
colnames(gsc_data) <- c("sample", "all", "posi")
# 各サンプルごとのpositive cellsの割合を計算し追記
gsc_data <- mutate(gsc_data, prop = (gsc_data$posi/gsc_data$all)*100)
# 四捨五入
gsc_data$prop <- round(gsc_data$prop, 1)
# 確認
> gsc_data
sample all posi prop
1 XX_10.5 415 21 5.1
2 XX_11.5 807 354 43.9
3 XX_12.5 1250 506 40.5
4 XX_FOSLCs 2265 999 44.1
5 XY_10.5 313 36 11.5
6 XY_11.5 748 317 42.4
7 XY_12.5 682 328 48.1
8 XY_FTSLCs 501 250 49.9
# barplot (240416_gsc_posi_prop_barplot, 20 x 10)
ggplot(gsc_data, aes(x = sample, y = prop)) + geom_col(fill="blue", colour="black") + labs(title="Percentage of Nr5a1 positive cells in gsc", x="samples", y="%") + geom_text(aes(label=prop), vjust=-1.5) + scale_y_continuous(limits=c(0,60))
```

<br>
```r
save(list=c("data.integrated", "data.integrated.gsc16_02", "data.integrated.gsc16_02_XY", "data.integrated.gsc16_02_XY12.5_FTSLC", "di_data", "gsc_data"),file="20240419.RData")
```
<br>
#### ☆ data.integratedのXYのみのbarplot
20240501
```r
load("20240419.RData")
# 確認
> di_data
sample all posi prop
1 XX_10.5 5423 1463 27.0
2 XX_11.5 2482 766 30.9
3 XX_12.5 2859 839 29.3
4 XX_FOSLCs 7562 2299 30.4
5 XY_10.5 3837 1132 29.5
6 XY_11.5 2101 760 36.2
7 XY_12.5 2764 890 32.2
8 XY_FTSLCs 2441 703 28.8
# XYのみ抽出
di_data_XY <- di_data[5:8,]
# 確認
> di_data_XY
sample all posi prop
5 XY_10.5 3837 1132 29.5
6 XY_11.5 2101 760 36.2
7 XY_12.5 2764 890 32.2
8 XY_FTSLCs 2441 703 28.8
# plot
ggplot(di_data_XY, aes(x = sample, y = prop))
+ geom_col(fill="blue", colour="black")
+ labs(title="Percentage of Nr5a1 positive cells in XY", x="samples", y="%")
+ geom_text(aes(label=prop), vjust=-1.5, size=5)
+ scale_y_continuous(limits=c(0,50))
+ theme(text = element_text(size=22))
```
