# PanglaoDB ve TidySeurat Kolon örnekleri seçildi. Kolon örneklerinden Fibroblast, Goblet cells, Epithelial cells, Cholangiocytes hücreleri seçildi. ```{r} library(rPanglaoDB) library(tidyverse) library(dplyr) library(Seurat) library(SeuratObject) ``` ```{r} getSampleList() %>% group_by(Tissue) %>% filter(Species == "Homo sapiens") %>% filter(Protocol == "10x chromium") %>% mutate(Cells = as.integer(Cells)) %>% summarize(toplam = sum(Cells)) ``` ```{r} getSampleList() %>% filter(Species == "Homo sapiens", Tissue == "Liver") %>% filter(Protocol == "10x chromium") %>% mutate(Cells = as.integer(Cells)) ``` ```{r} liver_samples <- getSampleList() %>% filter(Species == "Homo sapiens", Tissue == "Liver") %>% filter(Protocol == "10x chromium") %>% mutate(Cells = as.integer(Cells)) %>% pull(SRS) ``` ```{r} get_cell_count <- function(SAMPLE) { getSamples(srs=SAMPLE) %>% tidyseurat::count(CellTypes) } liver_samples %>% purrr::set_names() %>% purrr::map_df(get_cell_count, .id = "srs") ``` ```{r} colon_samples <- getSampleList() %>% filter(Species == "Homo sapiens", Tissue == "Colon") %>% filter(Protocol == "10x chromium") %>% mutate(Cells = as.integer(Cells)) %>% pull(SRS) ``` ```{r} get_cell_count <- function(SAMPLE) { getSamples(srs=SAMPLE) %>% tidyseurat::count(CellTypes) } colon_counts <- colon_samples %>% purrr::set_names() %>% purrr::map_df(get_cell_count, .id = "srs") ``` ```{r} colon_counts %>% group_by(CellTypes) %>% summarise(toplam = sum(n)) ``` ```{r} colon_counts ``` ```{r} deneme <- getSamples("SRA703206") deneme_2 <- getSamples("SRA728025") ``` ```{r} deneme %>% tidyseurat::as_tibble() %>% tail() ``` ```{r} colon_samples ``` ```{r} tidyseurat::join_features(all = T) ``` ```{r} deneme_3 <- getSamples(srs="SRS3454423") deneme_3_features <- deneme_3 %>% tidyseurat::join_features(all = T) # https://github.com/satijalab/seurat/issues/3715 ``` # Working code 2022-05-26 ```{r} library(rPanglaoDB) library(tidyverse) library(dplyr) library(Seurat) library(SeuratObject) ``` ```{r} colon_samples <- getSampleList() %>% filter(Species == "Homo sapiens", Tissue == "Colon") %>% filter(Protocol == "10x chromium") colon_samples ``` ```{r} colon_srs <- colon_samples %>% distinct(SRS) %>% pull(SRS) colon_srs ``` ```{r} cells_of_interest <- c("Fibroblasts", "Goblet cells", "Epithelial cells", "Cholangiocytes") ``` | Cell type | Count | |-----------|-------| | Fibroblasts | 6136 | | Goblet cells| 3201 | | Epithelial cells | 5982 | | Cholangiocytes | 11,541 | ```{r} get_cell_count <- function(SAMPLE) { getSamples(srs=SAMPLE) %>% tidyseurat::count(CellTypes) } colon_counts <- colon_samples %>% purrr::set_names() %>% purrr::map_df(get_cell_count, .id = "srs") ``` ```{r eval=FALSE} colon_srs %>% set_names() %>% map(~ {getSamples(srs=.x) %>% saveRDS(paste0(.x,".rds"))}) ``` ```{r} get_cell_type <- function(SRS) { filename <- paste0(SRS,".rds") readRDS(filename) %>% tidyseurat::as_tibble() %>% count(CellTypes) } cells_of_interest2 <- colon_srs %>% map_df(get_cell_type) %>% group_by(CellTypes) %>% summarize(total=sum(n)) %>% arrange(-total) %>% filter(total>3000) %>% drop_na() %>% pull(CellTypes) ``` ```{r} extract_data <- function(SRS){ readRDS( paste0(SRS,".rds")) %>% tidyseurat::as_tibble() %>% #mutate(CellTypes = str_trim(CellTypes)) %>% filter(CellTypes %in% cells_of_interest2) # filter(CellTypes =="Epithelial cells") } cell_of_interest_count <- colon_srs %>% map_df(~ extract_data(.x)) %>% count(CellTypes) cell_of_interest_count ``` ```{r} cells_of_interest_barcodes <- readRDS("SRS3296611.rds") %>% tidyseurat::inner_join(cell_of_interest_count) %>% pull(.cell) ``` ```{r} readRDS("SRS3296611.rds") %>% tidyseurat::count(CellTypes) %>% tidyseurat::inner_join(cell_of_interest_count, by="CellTypes") ``` ```{r} # slot="counts" low numbers count_matrix <- GetAssayData(object = readRDS("SRS3296611.rds")) ``` ```{r} # deneme <- colnames(count_matrix)[1:10] count_matrix[1:5,1:5] ``` ```{r} all_cells <- colnames(count_matrix) locations_of_barcodes <-match(cells_of_interest_barcodes,all_cells) na.omit(locations_of_barcodes) SRS3296611_coi_extract <- count_matrix[,locations_of_barcodes] saveRDS(SRS3296611_coi_extract, "SRS3296611_coi_extract.rds") df1 <- apply(SRS3296611_coi_extract, 1, max) %>% as_tibble() df2 <- apply(SRS3296611_coi_extract, 1, sd) %>% as_tibble() inner_join(df1, df2) # sonra da max ve sd'ye göre sort, slice(1:1024) ``` **01.06.2022** ```{r} df5 <- apply(SRS3296612_coi_extract, 1, max) %>% as_tibble(rownames = "genes") df4 <- apply(SRS3296612_coi_extract, 1, sd) %>% as_tibble(rownames = "genes") df4 %>% arrange(-value) %>% slice(1:2048) inner_join(df5, df4, by ="genes") %>% mutate(value.x = log(value.x)) %>% arrange(-value.y) %>% slice(1:2048) %>% ggplot(aes(value.x)) + geom_histogram() ``` ```r= library(tidyverse) library(tidyseurat) get_gene_range_per_srs <- function(SRS, PATH){ filename <- paste0(PATH,"/",SRS,".rds") seurat_data <- readRDS(filename) cells_of_interest <- c("Fibroblasts", "Goblet cells", "Epithelial cells", "Cholangiocytes") cell_of_interest_count <- seurat_data %>% filter(CellTypes %in% cells_of_interest) %>% count(CellTypes) cells_of_interest_barcodes <- seurat_data %>% tidyseurat::inner_join(cell_of_interest_count) %>% pull(.cell) count_matrix <- GetAssayData(object = seurat_data) all_cells <- colnames(count_matrix) locations_of_barcodes <- match(cells_of_interest_barcodes,all_cells) coi_extract <- count_matrix[,locations_of_barcodes] # df1 <- apply(coi_extract, 1, max) %>% as_tibble(rownames="genes") %>% rename(max=value) df2 <- apply(coi_extract, 1, sd) %>% as_tibble(rownames="genes") %>% rename(sd=value) data <- inner_join(df1, df2) %>% mutate(log_max=log(max)) %>% arrange(-sd) %>% slice(1:2048) plot <- data %>% ggplot(aes(log_max)) + geom_histogram() ggsave(paste0(PATH,"/",SRS,".png"), plot) data } ``` Colon SRS Listesi "SRS3296611" , "SRS3296612" , "SRS3454422", "SRS3454423" ,"SRS3454424", "SRS3454425", "SRS3454426", "SRS3454427" "SRS3454428", "SRS3454430" ```r= srs_list <- c("SRS3296612", "SRS3296611", "SRS3454430", "SRS3454428") four_srs <- srs_list %>% set_names() %>% map_df(~ get_gene_range_per_srs(.x, "."), .id = "srs" ) ``` Tek bir SRS için gen dataframe; ```{r} #srs_list<- c("SRS3454426") #four_srs <- srs_list %>% set_names() %>% map_df(~ get_gene_range_per_srs(.x, "."), .id = "srs" ) ``` ```r= four_srs %>% group_by(srs) %>% mutate(rank=min_rank(desc(sd))) %>% select(-max, -sd, -log_max) %>% pivot_wider(names_from = srs, values_from = rank) %>% rowwise(genes) %>% mutate(overall_rank = sum(c_across(starts_with("SRS")))) %>% arrange(overall_rank) ``` Soru: "SRS3296611_coi_extract" adlı matrikste variation olan satırlar (yani genler) bulacak ilkel (örn. StdDev) veya modern yöntemler ile 1024 gen bulalım. ```{r} four_srs %>% group_by(srs) %>% mutate(rank=min_rank(desc(sd))) %>% select(-max, -sd, -log_max) %>% pivot_wider(names_from = srs, values_from = rank) %>% mutate( across(starts_with("SRS"), ~replace_na(.x, 1024)) )%>% rowwise(genes) %>% mutate(overall_rank = sum(c_across(starts_with("SRS")), na.rm=T)) %>% ungroup() %>% arrange(overall_rank) %>% #drop_na(overall_rank) %>% slice(1:1024) ``` Burada bahsettiğiniz genlerle dataframe halinde oluşturmaya çalıştık. ```{r} gene_of_interest <- four_srs %>% data.frame(srs_list) %>% pull(genes) gene_of_interest ``` ![](https://i.imgur.com/mMsZwO4.png) ```{r} table_goi<- four_srs %>% group_by(srs) %>% mutate(rank=min_rank(desc(sd))) %>% select(-max, -sd, -log_max) %>% pivot_wider(names_from = srs, values_from = rank) %>% rowwise() %>% mutate(Var=sum(is.na(c_across(starts_with("SRS"))))) %>% ungroup() %>% mutate( across(starts_with("SRS"), ~replace_na(.x, 1024)) )%>% rowwise(genes) %>% mutate(overall_rank = sum(c_across(starts_with("SRS")), na.rm=T)) %>% ungroup() %>% filter(Var < 1 ) %>% arrange(overall_rank) %>% slice(1:784) ``` ```{r} table_goi %>% pluck("genes") -> goi_list goi_list `` ```{r} table_goi<- four_srs %>% group_by(srs) %>% mutate(rank=min_rank(desc(sd))) %>% select(-max, -sd, -log_max) %>% pivot_wider(names_from = srs, values_from = rank) %>% rowwise() %>% mutate(Var=sum(is.na(c_across(starts_with("SRS"))))) %>% ungroup() %>% mutate( across(starts_with("SRS"), ~replace_na(.x, 1024)) )%>% rowwise(genes) %>% mutate(overall_rank = sum(c_across(starts_with("SRS")), na.rm=T)) %>% ungroup() %>% filter(Var < 1 ) %>% arrange(overall_rank) %>% slice(1:784) ``` ```{r} table_goi %>% pluck("genes") -> goi_list goi_list `` ``` ```{r} get_count_matrix <- function(SRS, PATH){ filename <- paste0(PATH,"/",SRS,".rds") seurat_data <- readRDS(filename) cells_of_interest <- c("Fibroblasts", "Goblet cells", "Epithelial cells", "Cholangiocytes") cell_of_interest_count <- seurat_data %>% filter(CellTypes %in% cells_of_interest) %>% count(CellTypes) cells_of_interest_barcodes <- seurat_data %>% tidyseurat::inner_join(cell_of_interest_count) %>% pull(.cell) count_matrix <- GetAssayData(object = seurat_data) all_cells <- colnames(count_matrix) locations_of_barcodes <- match(cells_of_interest_barcodes,all_cells) all_genes <- rownames(count_matrix) locations_of_genes <- match(goi_list,all_genes) count_matrix[locations_of_genes,locations_of_barcodes] } ``` ```{r} get_count_matrix("SRS3296612", ".") ``` Buradan sonra matrix elde ediliyor ```{r} all_genes <- rownames(count_matrix) all_genes locations_of_genes <- match(goi_list,all_genes) count_matrix[locations_of_genes,] goi_list[is.na(locations_of_genes)] ``` ```{r} gene_of_interest <- four_srs %>% data.frame(srs_list) %>% pull(genes) gene_of_interest ``` ```{r} saveRDS(gene_of_interest, "gene_of_interest.rds") ``` # update the function so that colnames are cell name ```r= test <- matrix(1:10, 2,5) colnames(test) <- c("ABC","ADE","CFG","KLM","PRS") $ test ABC ADE CFG KLM PRS [1,] 1 3 5 7 9 [2,] 2 4 6 8 10 barcodes <- c("ABC","KLM","CFG","PRS","ADE") cells <- c("fibroblast", "fibroblast","epitel","epitel","fibroblast") $> barcodes [1] "ABC" "KLM" "CFG" "PRS" "ADE" $> cells [1] "fibroblast" "fibroblast" "epitel" "epitel" "fibroblast" the_order <- match(barcodes,colnames(test)) $> the_order [1] 1 4 3 5 2 $> cells[the_order] [1] "fibroblast" "epitel" "epitel" "fibroblast" "fibroblast" colnames(test) <- cells[the_order] $> test fibroblast epitel epitel fibroblast fibroblast [1,] 1 3 5 7 9 [2,] 2 4 6 8 10 ```