# PanglaoDB ve TidySeurat
Kolon örnekleri seçildi. Kolon örneklerinden Fibroblast, Goblet cells, Epithelial cells, Cholangiocytes hücreleri seçildi.
```{r}
library(rPanglaoDB)
library(tidyverse)
library(dplyr)
library(Seurat)
library(SeuratObject)
```
```{r}
getSampleList() %>%
group_by(Tissue) %>%
filter(Species == "Homo sapiens") %>%
filter(Protocol == "10x chromium") %>%
mutate(Cells = as.integer(Cells)) %>%
summarize(toplam = sum(Cells))
```
```{r}
getSampleList() %>%
filter(Species == "Homo sapiens", Tissue == "Liver") %>%
filter(Protocol == "10x chromium") %>%
mutate(Cells = as.integer(Cells))
```
```{r}
liver_samples <- getSampleList() %>%
filter(Species == "Homo sapiens", Tissue == "Liver") %>%
filter(Protocol == "10x chromium") %>%
mutate(Cells = as.integer(Cells)) %>%
pull(SRS)
```
```{r}
get_cell_count <- function(SAMPLE) {
getSamples(srs=SAMPLE) %>%
tidyseurat::count(CellTypes)
}
liver_samples %>%
purrr::set_names() %>%
purrr::map_df(get_cell_count, .id = "srs")
```
```{r}
colon_samples <- getSampleList() %>%
filter(Species == "Homo sapiens", Tissue == "Colon") %>%
filter(Protocol == "10x chromium") %>%
mutate(Cells = as.integer(Cells)) %>%
pull(SRS)
```
```{r}
get_cell_count <- function(SAMPLE) {
getSamples(srs=SAMPLE) %>%
tidyseurat::count(CellTypes)
}
colon_counts <- colon_samples %>%
purrr::set_names() %>%
purrr::map_df(get_cell_count, .id = "srs")
```
```{r}
colon_counts %>%
group_by(CellTypes) %>%
summarise(toplam = sum(n))
```
```{r}
colon_counts
```
```{r}
deneme <- getSamples("SRA703206")
deneme_2 <- getSamples("SRA728025")
```
```{r}
deneme %>%
tidyseurat::as_tibble() %>%
tail()
```
```{r}
colon_samples
```
```{r}
tidyseurat::join_features(all = T)
```
```{r}
deneme_3 <- getSamples(srs="SRS3454423")
deneme_3_features <- deneme_3 %>%
tidyseurat::join_features(all = T)
# https://github.com/satijalab/seurat/issues/3715
```
# Working code 2022-05-26
```{r}
library(rPanglaoDB)
library(tidyverse)
library(dplyr)
library(Seurat)
library(SeuratObject)
```
```{r}
colon_samples <- getSampleList() %>%
filter(Species == "Homo sapiens", Tissue == "Colon") %>%
filter(Protocol == "10x chromium")
colon_samples
```
```{r}
colon_srs <- colon_samples %>% distinct(SRS) %>% pull(SRS)
colon_srs
```
```{r}
cells_of_interest <- c("Fibroblasts", "Goblet cells", "Epithelial cells", "Cholangiocytes")
```
| Cell type | Count |
|-----------|-------|
| Fibroblasts | 6136 |
| Goblet cells| 3201 |
| Epithelial cells | 5982 |
| Cholangiocytes | 11,541 |
```{r}
get_cell_count <- function(SAMPLE) {
getSamples(srs=SAMPLE) %>%
tidyseurat::count(CellTypes)
}
colon_counts <- colon_samples %>%
purrr::set_names() %>%
purrr::map_df(get_cell_count, .id = "srs")
```
```{r eval=FALSE}
colon_srs %>%
set_names() %>%
map(~ {getSamples(srs=.x) %>% saveRDS(paste0(.x,".rds"))})
```
```{r}
get_cell_type <- function(SRS) {
filename <- paste0(SRS,".rds")
readRDS(filename) %>%
tidyseurat::as_tibble() %>%
count(CellTypes)
}
cells_of_interest2 <- colon_srs %>%
map_df(get_cell_type) %>%
group_by(CellTypes) %>%
summarize(total=sum(n)) %>%
arrange(-total) %>%
filter(total>3000) %>%
drop_na() %>%
pull(CellTypes)
```
```{r}
extract_data <- function(SRS){
readRDS( paste0(SRS,".rds")) %>%
tidyseurat::as_tibble() %>%
#mutate(CellTypes = str_trim(CellTypes)) %>%
filter(CellTypes %in% cells_of_interest2)
# filter(CellTypes =="Epithelial cells")
}
cell_of_interest_count <- colon_srs %>%
map_df(~ extract_data(.x)) %>%
count(CellTypes)
cell_of_interest_count
```
```{r}
cells_of_interest_barcodes <- readRDS("SRS3296611.rds") %>%
tidyseurat::inner_join(cell_of_interest_count) %>%
pull(.cell)
```
```{r}
readRDS("SRS3296611.rds") %>%
tidyseurat::count(CellTypes) %>%
tidyseurat::inner_join(cell_of_interest_count, by="CellTypes")
```
```{r}
# slot="counts" low numbers
count_matrix <- GetAssayData(object = readRDS("SRS3296611.rds"))
```
```{r}
# deneme <- colnames(count_matrix)[1:10]
count_matrix[1:5,1:5]
```
```{r}
all_cells <- colnames(count_matrix)
locations_of_barcodes <-match(cells_of_interest_barcodes,all_cells)
na.omit(locations_of_barcodes)
SRS3296611_coi_extract <- count_matrix[,locations_of_barcodes]
saveRDS(SRS3296611_coi_extract, "SRS3296611_coi_extract.rds")
df1 <- apply(SRS3296611_coi_extract, 1, max) %>% as_tibble()
df2 <- apply(SRS3296611_coi_extract, 1, sd) %>% as_tibble()
inner_join(df1, df2) # sonra da max ve sd'ye göre sort, slice(1:1024)
```
**01.06.2022**
```{r}
df5 <- apply(SRS3296612_coi_extract, 1, max) %>% as_tibble(rownames = "genes")
df4 <- apply(SRS3296612_coi_extract, 1, sd) %>% as_tibble(rownames = "genes")
df4 %>%
arrange(-value) %>%
slice(1:2048)
inner_join(df5, df4, by ="genes") %>%
mutate(value.x = log(value.x)) %>%
arrange(-value.y) %>%
slice(1:2048) %>%
ggplot(aes(value.x)) +
geom_histogram()
```
```r=
library(tidyverse)
library(tidyseurat)
get_gene_range_per_srs <- function(SRS, PATH){
filename <- paste0(PATH,"/",SRS,".rds")
seurat_data <- readRDS(filename)
cells_of_interest <- c("Fibroblasts", "Goblet cells", "Epithelial cells", "Cholangiocytes")
cell_of_interest_count <- seurat_data %>%
filter(CellTypes %in% cells_of_interest) %>%
count(CellTypes)
cells_of_interest_barcodes <- seurat_data %>%
tidyseurat::inner_join(cell_of_interest_count) %>%
pull(.cell)
count_matrix <- GetAssayData(object = seurat_data)
all_cells <- colnames(count_matrix)
locations_of_barcodes <- match(cells_of_interest_barcodes,all_cells)
coi_extract <- count_matrix[,locations_of_barcodes]
#
df1 <- apply(coi_extract, 1, max) %>% as_tibble(rownames="genes") %>% rename(max=value)
df2 <- apply(coi_extract, 1, sd) %>% as_tibble(rownames="genes") %>% rename(sd=value)
data <- inner_join(df1, df2) %>%
mutate(log_max=log(max)) %>%
arrange(-sd) %>%
slice(1:2048)
plot <- data %>% ggplot(aes(log_max)) +
geom_histogram()
ggsave(paste0(PATH,"/",SRS,".png"), plot)
data
}
```
Colon SRS Listesi
"SRS3296611" , "SRS3296612" , "SRS3454422", "SRS3454423" ,"SRS3454424", "SRS3454425", "SRS3454426", "SRS3454427" "SRS3454428", "SRS3454430"
```r=
srs_list <- c("SRS3296612", "SRS3296611", "SRS3454430", "SRS3454428")
four_srs <- srs_list %>% set_names() %>% map_df(~ get_gene_range_per_srs(.x, "."), .id = "srs" )
```
Tek bir SRS için gen dataframe;
```{r}
#srs_list<- c("SRS3454426")
#four_srs <- srs_list %>% set_names() %>% map_df(~ get_gene_range_per_srs(.x, "."), .id = "srs" )
```
```r=
four_srs %>%
group_by(srs) %>%
mutate(rank=min_rank(desc(sd))) %>%
select(-max, -sd, -log_max) %>%
pivot_wider(names_from = srs, values_from = rank) %>%
rowwise(genes) %>%
mutate(overall_rank = sum(c_across(starts_with("SRS")))) %>%
arrange(overall_rank)
```
Soru: "SRS3296611_coi_extract" adlı matrikste variation olan satırlar (yani genler) bulacak ilkel (örn. StdDev) veya modern yöntemler ile 1024 gen bulalım.
```{r}
four_srs %>%
group_by(srs) %>%
mutate(rank=min_rank(desc(sd))) %>%
select(-max, -sd, -log_max) %>%
pivot_wider(names_from = srs, values_from = rank) %>%
mutate(
across(starts_with("SRS"), ~replace_na(.x, 1024))
)%>%
rowwise(genes) %>%
mutate(overall_rank = sum(c_across(starts_with("SRS")), na.rm=T)) %>%
ungroup() %>%
arrange(overall_rank) %>%
#drop_na(overall_rank) %>%
slice(1:1024)
```
Burada bahsettiğiniz genlerle dataframe halinde oluşturmaya çalıştık.
```{r}
gene_of_interest <- four_srs %>%
data.frame(srs_list) %>%
pull(genes)
gene_of_interest
```

```{r}
table_goi<- four_srs %>%
group_by(srs) %>%
mutate(rank=min_rank(desc(sd))) %>%
select(-max, -sd, -log_max) %>%
pivot_wider(names_from = srs, values_from = rank) %>%
rowwise() %>%
mutate(Var=sum(is.na(c_across(starts_with("SRS"))))) %>%
ungroup() %>%
mutate(
across(starts_with("SRS"), ~replace_na(.x, 1024))
)%>%
rowwise(genes) %>%
mutate(overall_rank = sum(c_across(starts_with("SRS")), na.rm=T)) %>%
ungroup() %>%
filter(Var < 1 ) %>%
arrange(overall_rank) %>%
slice(1:784)
```
```{r}
table_goi %>%
pluck("genes") -> goi_list
goi_list
``
```{r}
table_goi<- four_srs %>%
group_by(srs) %>%
mutate(rank=min_rank(desc(sd))) %>%
select(-max, -sd, -log_max) %>%
pivot_wider(names_from = srs, values_from = rank) %>%
rowwise() %>%
mutate(Var=sum(is.na(c_across(starts_with("SRS"))))) %>%
ungroup() %>%
mutate(
across(starts_with("SRS"), ~replace_na(.x, 1024))
)%>%
rowwise(genes) %>%
mutate(overall_rank = sum(c_across(starts_with("SRS")), na.rm=T)) %>%
ungroup() %>%
filter(Var < 1 ) %>%
arrange(overall_rank) %>%
slice(1:784)
```
```{r}
table_goi %>%
pluck("genes") -> goi_list
goi_list
``
```
```{r}
get_count_matrix <- function(SRS, PATH){
filename <- paste0(PATH,"/",SRS,".rds")
seurat_data <- readRDS(filename)
cells_of_interest <- c("Fibroblasts", "Goblet cells", "Epithelial cells", "Cholangiocytes")
cell_of_interest_count <- seurat_data %>%
filter(CellTypes %in% cells_of_interest) %>%
count(CellTypes)
cells_of_interest_barcodes <- seurat_data %>%
tidyseurat::inner_join(cell_of_interest_count) %>%
pull(.cell)
count_matrix <- GetAssayData(object = seurat_data)
all_cells <- colnames(count_matrix)
locations_of_barcodes <- match(cells_of_interest_barcodes,all_cells)
all_genes <- rownames(count_matrix)
locations_of_genes <- match(goi_list,all_genes)
count_matrix[locations_of_genes,locations_of_barcodes]
}
```
```{r}
get_count_matrix("SRS3296612", ".")
```
Buradan sonra matrix elde ediliyor
```{r}
all_genes <- rownames(count_matrix)
all_genes
locations_of_genes <- match(goi_list,all_genes)
count_matrix[locations_of_genes,]
goi_list[is.na(locations_of_genes)]
```
```{r}
gene_of_interest <- four_srs %>%
data.frame(srs_list) %>%
pull(genes)
gene_of_interest
```
```{r}
saveRDS(gene_of_interest, "gene_of_interest.rds")
```
# update the function
so that colnames are cell name
```r=
test <- matrix(1:10, 2,5)
colnames(test) <- c("ABC","ADE","CFG","KLM","PRS")
$ test
ABC ADE CFG KLM PRS
[1,] 1 3 5 7 9
[2,] 2 4 6 8 10
barcodes <- c("ABC","KLM","CFG","PRS","ADE")
cells <- c("fibroblast", "fibroblast","epitel","epitel","fibroblast")
$> barcodes
[1] "ABC" "KLM" "CFG" "PRS" "ADE"
$> cells
[1] "fibroblast" "fibroblast" "epitel" "epitel" "fibroblast"
the_order <- match(barcodes,colnames(test))
$> the_order
[1] 1 4 3 5 2
$> cells[the_order]
[1] "fibroblast" "epitel" "epitel" "fibroblast" "fibroblast"
colnames(test) <- cells[the_order]
$> test
fibroblast epitel epitel fibroblast fibroblast
[1,] 1 3 5 7 9
[2,] 2 4 6 8 10
```