# R study (Scatter plot) ## Scater plot drawing ::: info This is the protocol to create scatter plot to compare genome size, gene content and repeat. This plot was produced with R-studio and statements that are described as follows. ::: --- ## R studio statement ### Turn on the required libraries ```r= library(readxl) library(ggplot2) library(ggthemes) library(forcats) library(dplyr) library(ggrepel) library(gridExtra) ``` ### Turn off scientific notations like 1e+40 ```r= options(scipen = 999) ``` ### Load file ```r= df<-read_excel("dfdf.xlsx") ``` ### Add new column ```r= dfmutate<- df %>% mutate(subgenome = gsub("^.*._","", Assembly)) %>% mutate(species = gsub("_.*","", Assembly)) %>% mutate(Genome_size_Mbp = Genome_size/1000000) ``` ### MergedGenome_size X Gene_content plot ```r= a<- dfmutate %>% mutate(Merged_genome = fct_reorder(Merged_genome, Order)) %>% ggplot(aes(x=Genome_size_Mbp, y=Gene_content, fill=Merged_genome, size=LAI)) + scale_x_continuous(labels = scales::comma) + scale_y_continuous(labels = scales::comma) + geom_point(alpha=0.7, aes(shape=Merged_genome)) + geom_text_repel(aes(label=GeneContentAnnotation), size=4, segment.linetype=1, segment.curvature=0, point.padding= 1, nudge_x=30, nudge_y=-1800) + scale_shape_manual(values = c(22, 23, 24)) + scale_fill_manual(values = c("#414141", "#414141", "#414141")) + scale_size(range = c(2,10), limits = c(5,20)) + guides(fill=guide_legend(override.aes = list(size=10))) + theme(legend.text = element_text(size = 15), legend.position = "bottom", legend.direction = "horizontal", axis.line = element_line(color="black")) + labs(x="Genome size (Mbp)", y="Gene content", size="LTR Assembly Index (LAI)", fill ="Merged_genome") ``` ### MergedGenome_size X Repeat plot ```r= b<- dfmutate %>% mutate(Merged_genome = fct_reorder(Merged_genome, Order)) %>% ggplot(aes(x=Genome_size_Mbp, y=Repeat, fill=Merged_genome, size=LAI)) + scale_x_continuous(labels = scales::comma) + scale_y_continuous(labels = scales::comma) + geom_point(alpha=0.7, aes(shape=Merged_genome)) + geom_text_repel(aes(label=RepeatAnnotation), size=4, segment.linetype=1, segment.curvature=0, point.padding= 1.5, nudge_x = 35, nudge_y=-0.01) + scale_shape_manual(values = c(22, 23, 24)) + scale_fill_manual(values = c("#414141", "#414141", "#414141")) + scale_size(range = c(2,10), limits = c(5,20)) + guides(fill=guide_legend(override.aes = list(size=10))) + theme(legend.text = element_text(size = 15), legend.position = "bottom", legend.direction = "horizontal", axis.line = element_line(color="black")) + labs(x="Genome size (Mbp)", y="Repeat", size="LTR Assembly Index (LAI)", fill ="Merged_genome") ``` ### MergedGenome_size X LTR ```r= c<- dfmutate %>% mutate(Merged_genome = fct_reorder(Merged_genome, Order)) %>% ggplot(aes(x=Genome_size_Mbp, y=LTR, fill=Merged_genome, size=LAI)) + scale_x_continuous(labels = scales::comma) + scale_y_continuous(labels = scales::comma) + geom_point(alpha=0.7, aes(shape=Merged_genome)) + geom_text_repel(aes(label=LTRAnnotation), size=4, segment.linetype=1, segment.curvature=0, point.padding= 1, nudge_x=50, nudge_y=-0.3) + scale_shape_manual(values = c(22, 23, 24)) + scale_fill_manual(values = c("#414141", "#414141", "#414141")) + scale_size(range = c(2,10), limits = c(5,20)) + guides(fill=guide_legend(override.aes = list(size=10))) + theme(legend.text = element_text(size = 15), legend.position = "bottom", legend.direction = "horizontal", axis.line = element_line(color="black")) + labs(x="Genome size (Mbp)", y="LTR", size="LTR Assembly Index (LAI)", fill ="Merged_genome") ``` ### Subgenome_size X Gene_content plot ```r= d<- dfmutate %>% mutate(Subgenome = fct_reorder(Subgenome, Order)) %>% ggplot(aes(x=Genome_size_Mbp, y=Gene_content, fill=Subgenome, size=LAI)) + scale_x_continuous(labels = scales::comma) + scale_y_continuous(labels = scales::comma) + geom_point(alpha=0.7, aes(shape=Subgenome)) + geom_text_repel(aes(label=GeneContentAnnotation), size=4, segment.linetype=1, segment.curvature=0, point.padding= 1.5, nudge_x = 25, nudge_y =1500) + scale_shape_manual(values = c(22, 22, 23, 23, 24, 24, 21, 21, 21)) + scale_fill_manual(values = c("#197DB5", "#42A63C", "#197DB5", "#E31617", "#42A63C", "#E31617", "#197DB5", "#42A63C", "#E31617")) + scale_size(range = c(2,10), limits = c(5,30)) + guides(fill=guide_legend(override.aes = list(size=10))) + theme(legend.text = element_text(size = 15), legend.position = "bottom", legend.direction = "horizontal", axis.line = element_line(color="black")) + labs(x="Genome size (Mbp)", y="Gene content", size="LTR Assembly Index (LAI)", fill ="Subgenome") ``` ### Subgenome_size X Repeat plot ```r= e<- dfmutate %>% mutate(Subgenome = fct_reorder(Subgenome, Order)) %>% ggplot(aes(x=Genome_size_Mbp, y=Repeat, fill=Subgenome, size=LAI)) + scale_x_continuous(labels = scales::comma) + scale_y_continuous(labels = scales::comma) + geom_point(alpha=0.7, aes(shape=Subgenome)) + geom_text_repel(aes(label=RepeatAnnotation), size=4, segment.linetype=1, segment.curvature=0, point.padding= 1.5, nudge_x = 35, nudge_y=-0.015, max.overlaps = Inf) + scale_shape_manual(values = c(22, 22, 23, 23, 24, 24, 21, 21, 21)) + scale_fill_manual(values = c("#197DB5", "#42A63C", "#197DB5", "#E31617", "#42A63C", "#E31617", "#197DB5", "#42A63C", "#E31617")) + scale_size(range = c(2,10), limits = c(5,30)) + guides(fill=guide_legend(override.aes = list(size=10))) + theme(legend.text = element_text(size = 15), legend.position = "bottom", legend.direction = "horizontal", axis.line = element_line(color="black")) + labs(x="Genome size (Mbp)", y="Repeat", size="LTR Assembly Index (LAI)", fill ="Subgenome") ``` ### Subgenome_size X LTR ```r= f<- dfmutate %>% mutate(Subgenome = fct_reorder(Subgenome, Order)) %>% ggplot(aes(x=Genome_size_Mbp, y=LTR, fill=Subgenome, size=LAI)) + scale_x_continuous(labels = scales::comma) + scale_y_continuous(labels = scales::comma) + geom_point(alpha=0.7, aes(shape=Subgenome)) + geom_text_repel(aes(label=LTRAnnotation), size=4, segment.linetype=1, segment.curvature=0, point.padding= 1.5, nudge_x = 35, nudge_y=-0.015, max.overlaps = Inf) + scale_shape_manual(values = c(22, 22, 23, 23, 24, 24, 21, 21, 21)) + scale_fill_manual(values = c("#197DB5", "#42A63C", "#197DB5", "#E31617", "#42A63C", "#E31617", "#197DB5", "#42A63C", "#E31617")) + scale_size(range = c(2,10), limits = c(5,30)) + guides(fill=guide_legend(override.aes = list(size=10))) + theme(legend.text = element_text(size = 15), legend.position = "bottom", legend.direction = "horizontal", axis.line = element_line(color="black")) + labs(x="Genome size (Mbp)", y="LTR", size="LTR Assembly Index (LAI)", fill ="Subgenome") ``` ### Faceting plot ```r= grid <- grid.arrange(a, b, c, d, e, f, ncol=3) ``` ### Save as PDF ```r= ggsave("Scatter.pdf", grid, width=24, height=16) ``` --- ### Reference -- https://ggplot2.tidyverse.org/reference/scale_continuous.html -- https://www.datanovia.com/en/blog/ggplot-colors-best-tricks-you-will-love/ -- https://www.r-graph-gallery.com/320-the-basis-of-bubble-plot.html -- https://statisticsglobe.com/control-size-of-ggplot2-legend-items-in-r -- https://t-redactyl.io/blog/2016/02/creating-plots-in-r-using-ggplot2-part-6-weighted-scatterplots.html -- https://ggplot2.tidyverse.org/reference/geom_text.html -- https://ggrepel.slowkow.com/articles/examples.html -- http://www.sthda.com/english/wiki/ggplot2-point-shapes --- ### Input File #### Mergedgenome | Order | Taxonomy | Assembly | FullName | ScientificName | GeneContentAnnotation | RepeatAnnotation | Merged_genome | Genome_size | Gene_content | Repeat | LAI | |-------|-------------|----------------|----------------------------|-------------------------|-------------------------|-------------------------|---------------|-------------|--------------|--------|-------------| | 1 | B. juncea | T8466 | B. juncea_T8466_AB | B. juncea_T8466 | B. juncea_T8466 | B. juncea_T8466 | AB | 752095125 | 79644 | 38.26 | 6.694542154 | | 1 | B. juncea | Varuna | B. juncea_Varuna_AB | B. juncea_Varuna | B. juncea_Varuna | B. juncea_Varuna | AB | 811335218 | 101959 | 47.95 | 17.95236206 | | 2 | B. napus | Darmor-bzh V5 | B. napus_Darmor-bzh V5_AC | B. napus_Darmor-bzh V5 | B. napus_Darmor-bzh V5 | B. napus_Darmor-bzh V5 | AC | 738348646 | 101040 | 42.48 | 8.717496214 | | 2 | B. napus | No2127 | B. napus_No2127_AC | B. napus_No2127 | | | AC | 1011356825 | 95385 | 44.1 | 11.55771973 | | 2 | B. napus | Zs11 | B. napus_Zs11_AC | B. napus_Zs11 | | B. napus_Zs11 | AC | 1008157456 | 100919 | 54.76 | 11.72267492 | | 2 | B. napus | GanganF73 | B. napus_GanganF73_AC | B. napus_GanganF73 | | | AC | 1033284646 | 96843 | 40.97 | 11.91534388 | | 2 | B. napus | Westar | B. napus_Westar_AC | B. napus_Westar | | | AC | 1007201016 | 97514 | 44.11 | 12.44822493 | | 2 | B. napus | Zheyou73 | B. napus_Zheyou73_AC | B. napus_Zheyou73 | | | AC | 1015024007 | 96209 | 42.78 | 12.90562522 | | 2 | B. napus | Shengli3 | B. napus_Shengli3_AC | B. napus_Shengli3 | | | AC | 1001432891 | 94586 | 44.61 | 13.04633217 | | 2 | B. napus | QuintaA | B. napus_QuintaA_AC | B. napus_QuintaA | | | AC | 1003196373 | 95492 | 43.61 | 13.12935172 | | 2 | B. napus | Tapidor3 | B. napus_Tapidor3_AC | B. napus_Tapidor3 | | | AC | 1013525683 | 96117 | 43.4 | 13.25117018 | | 2 | B. napus | Express617 | B. napus_Express617_AC | B. napus_Express617 | | B. napus_Express617 | AC | 924304952 | 99481 | 52.68 | 13.92687317 | | 2 | B. napus | Darmor-bzh V10 | B. napus_Darmor-bzh V10_AC | B. napus_Darmor-bzh V10 | B. napus_Darmor-bzh V10 | B. napus_Darmor-bzh V10 | AC | 897803932 | 108190 | 51.46 | 16.02264393 | | 3 | B. carinata | Zd-1 | B. carinata_Zd-1_BC | B. carinata_Zd-1 | B. carinata_Zd-1 | B. carinata_Zd-1 | BC | 1086791901 | 97149 | 52.27 | 9.56776514 | | 3 | B. carinata | Gomenzer | B. carinata_Gomenzer_BC | B. carinata_Gomenzer | B. carinata_Gomenzer | B. carinata_Gomenzer | BC | 1309763713 | 119303 | 50.66 | 12.22906499 | #### Subgenome + Deploid | Order | Taxonomy | Assembly | FullName | ScientificName | GeneContentAnnotation | RepeatAnnotation | Subgenome | Genome_size | Gene_content | Repeat | LAI | |-------|-------------|----------------|---------------------------|-------------------------|-------------------------|-------------------------|-----------|-------------|--------------|--------|-------------| | 1 | B. juncea | T8466 | B. juncea_T8466_B | B. juncea_T8466 | B. juncea_T8466 | B. juncea_T8466 | Bj | 334544090 | 35686 | 33.46 | 7.161416474 | | 1 | B. juncea | T8466 | B. juncea_T8466_A | B. juncea_T8466 | B. juncea_T8466 | B. juncea_T8466 | Aj | 303520244 | 36454 | 28.72 | 8.099664751 | | 1 | B. juncea | Varuna | B. juncea_Varuna_B | B. juncea_Varuna | B. juncea_Varuna | B. juncea_Varuna | Bj | 498619819 | 55578 | 50.69 | 16.88091919 | | 1 | B. juncea | Varuna | B. juncea_Varuna_A | B. juncea_Varuna | B. juncea_Varuna | B. juncea_Varuna | Aj | 312715399 | 46381 | 35.75 | 22.84860488 | | 2 | B. napus | Darmor-bzh V5 | B. napus_Darmor-bzh V5_C | B. napus_Darmor-bzh V5 | B. napus_Darmor-bzh V5 | B. napus_Darmor-bzh V5 | Cn | 449982337 | 56055 | 43.42 | 9.454352941 | | 2 | B. napus | No2127 | B. napus_No2127_C | B. napus_No2127 | | | Cn | 538575287 | 47511 | 50.94 | 13.06307737 | | 2 | B. napus | Darmor-bzh V5 | B. napus_Darmor-bzh V5_A | B. napus_Darmor-bzh V5 | B. napus_Darmor-bzh V5 | B. napus_Darmor-bzh V5 | An | 280765941 | 44452 | 29.72 | 14.10367751 | | 2 | B. napus | Zs11 | B. napus_Zs11_C | B. napus_Zs11 | | B. napus_Zs11 | Cn | 567580875 | 52562 | 50.6 | 15.35877405 | | 2 | B. napus | Westar | B. napus_Westar_C | B. napus_Westar | | | Cn | 549609042 | 49517 | 49.06 | 15.42458736 | | 2 | B. napus | Shengli3 | B. napus_Shengli3_C | B. napus_Shengli3 | | | Cn | 533014753 | 46374 | 50.84 | 15.94125587 | | 2 | B. napus | GanganF73 | B. napus_GanganF73_C | B. napus_GanganF73 | | | Cn | 540277955 | 48502 | 48.78 | 16.10498551 | | 2 | B. napus | Tapidor3 | B. napus_Tapidor3_C | B. napus_Tapidor3 | | | Cn | 544423911 | 48522 | 48.73 | 16.45039125 | | 2 | B. napus | Zheyou73 | B. napus_Zheyou73_C | B. napus_Zheyou73 | | | Cn | 534371242 | 47289 | 49.36 | 16.73125952 | | 2 | B. napus | GanganF73 | B. napus_GanganF73_A | B. napus_GanganF73 | | | An | 380403960 | 41227 | 42 | 17.45259696 | | 2 | B. napus | QuintaA | B. napus_QuintaA_C | B. napus_QuintaA | | | Cn | 546888897 | 49096 | 49.23 | 17.49412371 | | 2 | B. napus | Westar | B. napus_Westar_A | B. napus_Westar | | | An | 384212655 | 41968 | 45.46 | 17.71108333 | | 2 | B. napus | Zs11 | B. napus_Zs11_A | B. napus_Zs11 | | B. napus_Zs11 | An | 390985729 | 44359 | 43.98 | 18.42861614 | | 2 | B. napus | No2127 | B. napus_No2127_A | B. napus_No2127 | | | An | 370435621 | 39708 | 46.34 | 18.79608167 | | 2 | B. napus | Express617 | B. napus_Express617_C | B. napus_Express617 | | B. napus_Express617 | Cn | 468758094 | 51243 | 45.61 | 18.84392473 | | 2 | B. napus | Darmor-bzh V10 | B. napus_Darmor-bzh V10_C | B. napus_Darmor-bzh V10 | B. napus_Darmor-bzh V10 | B. napus_Darmor-bzh V10 | Cn | 515379367 | 59692 | 48.18 | 19.85571515 | | 2 | B. napus | Shengli3 | B. napus_Shengli3_A | B. napus_Shengli3 | | | An | 374319997 | 40600 | 46.94 | 20.16654278 | | 2 | B. napus | Tapidor3 | B. napus_Tapidor3_A | B. napus_Tapidor3 | | | An | 375413050 | 39924 | 46.49 | 20.65422451 | | 2 | B. napus | Darmor-bzh V10 | B. napus_Darmor-bzh V10_A | B. napus_Darmor-bzh V10 | B. napus_Darmor-bzh V10 | B. napus_Darmor-bzh V10 | An | 334020585 | 47193 | 39.07 | 20.97099251 | | 2 | B. napus | QuintaA | B. napus_QuintaA_A | B. napus_QuintaA | | | An | 372012156 | 40494 | 45.23 | 20.98849351 | | 2 | B. napus | Zheyou73 | B. napus_Zheyou73_A | B. napus_Zheyou73 | | | An | 371055365 | 41215 | 45.93 | 21.23190269 | | 2 | B. napus | Express617 | B. napus_Express617_A | B. napus_Express617 | | B. napus_Express617 | An | 295456129 | 45189 | 34.06 | 28.52296337 | | 3 | B. carinata | Zd-1 | B. carinata_Zd-1_B | B. carinata_Zd-1 | B. carinata_Zd-1 | B. carinata_Zd-1 | Bc | 462305350 | 37470 | 52.56 | 9.40860339 | | 3 | B. carinata | Zd-1 | B. carinata_Zd-1_C | B. carinata_Zd-1 | B. carinata_Zd-1 | B. carinata_Zd-1 | Cc | 556572262 | 52279 | 47.64 | 9.69980315 | | 3 | B. carinata | Gomenzer | B. carinata_Gomenzer_B | B. carinata_Gomenzer | B. carinata_Gomenzer | B. carinata_Gomenzer | Bc | 686177361 | 64787 | 47.26 | 11.78271876 | | 3 | B. carinata | Gomenzer | B. carinata_Gomenzer_C | B. carinata_Gomenzer | B. carinata_Gomenzer | B. carinata_Gomenzer | Cc | 623586352 | 54516 | 48.67 | 12.70550949 | | 4 | B. rapa | FPsc | B. rapa_FPsc_AA | B. rapa_FPsc | | | Ar | 250161472 | 40492 | 25.44 | 18.90215463 | | 4 | B. rapa | Chiifu | B. rapa_Chiifu_AA | B. rapa_Chiifu | | | Ar | 351063200 | 46250 | 40.82 | 19.20918142 | | 4 | B. rapa | Z1 | B. rapa_Z1_AA | B. rapa_Z1 | | | Ar | 368960336 | 46721 | 42.11 | 19.41933996 | | 5 | B. nigra | NI100 | B. nigra_NI100_BB | B. nigra_NI100 | | | Bn | 505987728 | 59852 | 50.76 | 14.36197726 | | 5 | B. nigra | C2 | B. nigra_C2_BB | B. nigra_C2 | | | Bn | 536229160 | 67021 | 46.17 | 14.71554826 | | 5 | B. nigra | YZ12151 | B. nigra_YZ12151_BB | B. nigra_YZ12151 | | | Bn | 277308113 | 47953 | 28.19 | 16.13843035 | | 6 | B. oleracea | BOL | B. oleracea_BOL_CC | B. oleracea_BOL | | | Co | 445615242 | 59225 | 39.45 | 5.298091494 | | 6 | B. oleracea | Capitata | B. oleracea_Capitata_CC | B. oleracea_Capitata | | | Co | 359360236 | 35400 | 40.94 | 7.577696769 | | 6 | B. oleracea | D134 | B. oleracea_D134_CC | B. oleracea_D134 | | | Co | 574885288 | 43868 | 42.96 | 13.45541246 | | 6 | B. oleracea | JZS | B. oleracea_JZS_CC | B. oleracea_JZS | | | Co | 561011681 | 59064 | 49.85 | 16.10332172 | | 6 | B. oleracea | HDEM | B. oleracea_HDEM_CC | B. oleracea_HDEM | | | Co | 545017856 | 61279 | 50.46 | 18.04581851 |