tags: R Visualization

Visualization

Image Not Showing Possible Reasons
  • The image file may be corrupted
  • The server hosting the image is unavailable
  • The image path is incorrect
  • The image format is not supported
Learn More →

Image Not Showing Possible Reasons
  • The image file may be corrupted
  • The server hosting the image is unavailable
  • The image path is incorrect
  • The image format is not supported
Learn More →

Other Reference:
Rpubs

R Markdown Theme

date: '`r Sys.Date()`'
output: rmdformats::material  # readthedown
date: "`r Sys.Date()`"
output: 
  prettydoc::html_pretty:
    number_sections: yes
    theme: cayman  # hpstr
    toc: true
output:
  html_document:
    highlight: pygments
    theme: flatly
    css: style.css
    toc: true
    toc_depth: 3
    toc_float:
      collapsed: false

Correlation Plot

法一

library(corrplot)

dfcor = cor(df[,c(4:5,7,10:11)])
round(dfcor, 2)

symnum(dfcor, abbr.colnames = F)
corrplot(dfcor, type = "upper", order = "hclust", tl.col = "black", tl.srt = 0)
corrplot.mixed(cor(df[,c(4:5,7,10:11)]), upper="ellipse")

Image Not Showing Possible Reasons
  • The image file may be corrupted
  • The server hosting the image is unavailable
  • The image path is incorrect
  • The image format is not supported
Learn More →

法二

col = colorRampPalette(c("blue", "white", "red"))(20)
heatmap(x = res, col = col, symm = TRUE) # symm表示是否對稱

Image Not Showing Possible Reasons
  • The image file may be corrupted
  • The server hosting the image is unavailable
  • The image path is incorrect
  • The image format is not supported
Learn More →

法三

library(PerformanceAnalytics)
chart.Correlation(mydata, histogram = T, pch=19)

Image Not Showing Possible Reasons
  • The image file may be corrupted
  • The server hosting the image is unavailable
  • The image path is incorrect
  • The image format is not supported
Learn More →

ggplot2 套件

註:Mac畫ggplot無法顯示中文解決方法

ggplot()+
theme(text=element_text(family="蘋方-繁 中黑體", size=12),
      plot.title=element_text(hjust = 0.5))

1. Line Plot

ZM = Z0 %>% mutate(month = format(as.Date(date),format="%m"))
age_ratio = as.data.frame(prop.table(table(ZM$age,ZM$month)))
ZM = left_join(ZM, age_ratio, by=c("age"="Var1", "month"="Var2"))
ggplot(ZM, aes(x = month, y = Freq, group = age, color=age)) + 
  geom_point() +
  geom_line() # 沒有做group會無法把點跟點一起連線

Image Not Showing Possible Reasons
  • The image file may be corrupted
  • The server hosting the image is unavailable
  • The image path is incorrect
  • The image format is not supported
Learn More →

# 熱門男生名字的『比例』
mtop = mbaby %>% group_by(year) %>% top_n(1, number) %>% 
  pull(name) %>% unique()

mbaby %>% group_by(year) %>%
  mutate(fraction = number / sum(number)) %>%
  filter(name %in% mtop) %>%
  ggplot(aes(x=year,y=fraction,col=name)) + 
  geom_line(alpha=0.5) + geom_point(size=0.5) -> g
ggplotly(g)  

Image Not Showing Possible Reasons
  • The image file may be corrupted
  • The server hosting the image is unavailable
  • The image path is incorrect
  • The image format is not supported
Learn More →

  • Label
ggplot(ex1221, aes(Discharge, Area)) +
  geom_point(aes(size=NO3)) + 
  xlab("My x label") +
  ylab("My y label") +
  ggtitle("Weighted Scatterplot of Watershed Area vs. Discharge and Nitrogen Levels (PPM)")
ggplot(ex1221, aes(Discharge, Area)) +
  geom_point(aes(size=NO3)) + 
  labs(size= "Nitrogen",
       x = "My x label",
       y = "My y label",
       title = "Weighted Scatterplot of Watershed Area vs. Discharge and Nitrogen Levels (PPM)")

2. Bar Plot

  • 使用時機:類別間的比例/數量比較
    Image Not Showing Possible Reasons
    • The image file may be corrupted
    • The server hosting the image is unavailable
    • The image path is incorrect
    • The image format is not supported
    Learn More →
  • 看總數及個別數量:左
  • 看比例分佈:中
  • 看個別數量比較:右

geom_bar

df %>% 
  ggplot(aes(as.factor(mon), fill = dept)) + 
  geom_bar() +
  geom_text(stat = "count", aes(label = (..count..)), vjust=0.5, position=position_dodge(width=1), vjust=-0.25, size=4) +
  theme(legend.position='none',
        text = element_text(family="蘋方-繁 中黑體", size=12),
        plot.title = element_text(size=24, hjust = 0.5)) +
  xlab("實習時長(月數)") +
  ylab("人數") +
  facet_wrap(~ dept, scales = "free") +
  ggtitle("實習時長分析-依各系")

Image Not Showing Possible Reasons
  • The image file may be corrupted
  • The server hosting the image is unavailable
  • The image path is incorrect
  • The image format is not supported
Learn More →

df %>% 
  ggplot(aes(as.factor(mon), fill = dept)) + 
  geom_bar() +
  geom_text(stat = "count", aes(label = (..count..)), position=position_stack(vjust=0.5), size=4) +
  theme(text = element_text(family="蘋方-繁 中黑體", size=12),
        plot.title = element_text(size=24, hjust = 0.5)) +
  xlab("月數") +
  ylab("人數") +
  ggtitle("實習時長分析-依月數(加總)")

Image Not Showing Possible Reasons
  • The image file may be corrupted
  • The server hosting the image is unavailable
  • The image path is incorrect
  • The image format is not supported
Learn More →

df %>% ggplot(aes(prov, fill = expr)) +
  geom_bar(position = 'dodge') + 
  scale_x_discrete(labels=c("correct","not_provide","incorrect"))

Image Not Showing Possible Reasons
  • The image file may be corrupted
  • The server hosting the image is unavailable
  • The image path is incorrect
  • The image format is not supported
Learn More →

ggplot(aes(x= artDate,y=sentiment,fill=method)) +
  geom_col(show.legend = FALSE) + # 黏在一起的長條圖(分顏色)
  scale_x_date(labels = date_format("%m/%d")) + # 日期format
  # 依照method切開長條圖成兩張scale一樣的圖, col數選1就是上下, 2為左右 
  facet_wrap(~method, ncol = 2, scales = "fixed")+ 
  geom_text(aes(label=sentiment)) # 標數值

Image Not Showing Possible Reasons
  • The image file may be corrupted
  • The server hosting the image is unavailable
  • The image path is incorrect
  • The image format is not supported
Learn More →

Image Not Showing Possible Reasons
  • The image file may be corrupted
  • The server hosting the image is unavailable
  • The image path is incorrect
  • The image format is not supported
Learn More →

word_count %>% 
  inner_join(LIWC_ch) %>%
  group_by(sentiment) %>%
  top_n(10,wt = count) %>% # wt: Use for ordering. If not specified, defaults to the last variable in the tbl.
  ungroup() %>%  # 先ungroup才可以重新排列
  mutate(word = reorder(word, count)) %>%
  ggplot(aes(word, count, fill = sentiment)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~sentiment, scales = "free_y") +
  labs(y = "Contribution to sentiment",
       x = NULL) +
  theme(text=element_text(family="蘋方-繁 中黑體", size=14))+
  coord_flip() # 直轉橫

Image Not Showing Possible Reasons
  • The image file may be corrupted
  • The server hosting the image is unavailable
  • The image path is incorrect
  • The image format is not supported
Learn More →

geom_col

df %>% 
  group_by(dept) %>% 
  count(comDept) %>% 
  ggplot(aes(comDept, n, fill=dept)) + 
  geom_col() +
  geom_text(aes(y=n, label=n), position=position_dodge(width=1), vjust=-0.25, size=4) +
  theme(legend.position='none',
        text = element_text(family="蘋方-繁 中黑體", size=12),
        plot.title = element_text(size=24, hjust = 0.5)) +
  xlab("實習部門") +
  ylab("人數") +
  facet_wrap(~ dept, scales = "free") +
  ggtitle("各系實習部門分析-依系所")

3. Lineplot(Ratio) + Barplot(Count)

  • 適合用於趨勢分析
trend = function(var, col){
  stat({{var}}) %>% 
    ggplot() + 
    geom_bar(mapping = aes(x = {{var}}, y = n), stat = "identity", fill = col) +
    geom_line(mapping = aes(x = {{var}}, y = ratio*554, group=1), size = 1.3, color = "#024349") + 
    scale_y_continuous(name = "Total", 
      sec.axis = sec_axis(~./554, name = "Turnover Rate", 
        labels = function(b) { paste0(round(b * 100, 0), "%")})) + 
    theme(
        axis.title.y = element_text(color = col),
        axis.title.y.right = element_text(color = "#024349"))
}
trend(year, "#FAD589")

4. Boxplot

  • 上三張圖都是為了看資料的分佈
  • 注重數量:hitogram
  • 注重比例:density(密度函數的好處就是將級距差太大的變數能放在一起看)
simps_cnt %>% 
  inner_join(afinn) %>% 
  group_by(episode, value) %>% 
  summarise(cnt = sum(count)) %>% ggplot() +
  geom_boxplot(aes(x=episode, y=value, colour=as.factor(episode))) +
  ggtitle("Afinn-每集情緒差異") + 
  scale_x_continuous(breaks=seq(1,16,1)) + # 將x軸全部的點都標出來 # 原本只有1,5,10,15
  theme(text=element_text(family="蘋方-繁 中黑體", size=12),
      plot.title=element_text(hjust = 0.5)) -> p_afinn
	  
plotly::ggplotly(p_afinn)

  • ggplot中的par(mfrow=c(1,2))
plot1 = df %>% ggplot(aes(sex,scale(itr))) +
  geom_boxplot() +
  scale_x_discrete(labels=c("male","female"))
plot2 = df %>% ggplot(aes(sex,scale(ip))) +
  geom_boxplot() +
  scale_x_discrete(labels=c("male","female"))
gridExtra::grid.arrange(plot1, plot2, nrow=1, ncol=2)

善用melt轉換

將原先三個欄位"sex","itr"和"ip"變為以下三欄位(更長):

  • sex=c("1","2")
  • variables=c("itr","ip")
  • value=c(1.1,1.4,2.3,對照的值)

即能用同樣的資料集,將上面的組合圖整合為一張圖

itr_ip = melt(df[,c(4:6)], id.var = "sex") 
itr_ip %>% 
  ggplot(aes(variable,scale(value))) +
  geom_boxplot(aes(fill=sex)) + 
  scale_fill_manual(values=c("#0099CC","#FF6666"))

Plot

  • 基本繪圖功能:

    • 類別變數的分布:直條圖 barplot()
    • 數量變數的分布:直方圖 hist()
    • 兩數量變數的關係:點狀圖 plot()
    • 比較各類別的數量分布:盒狀圖 boxplot()
    • 設定par(family="蘋方-繁 中黑體")
  • plot(Y~X, family="蘋方-繁 中黑體")

par(mfrow=c(2,2))
plot(SLID$wages~SLID$language)
plot(SLID$wages~SLID$age)
plot(SLID$wages~SLID$education)
plot(SLID$wages~SLID$sex)

  • barplot(data type: matrix | vector)
#兩行不同顏色為一組分類的長條圖 matrix[3x2]
ex_pr = as.matrix(table(df$expr,df$prov))
colnames(ex_pr) = c("correct","not_provide","incorrect") 
ex_pr[1,] = ex_pr[1,]/sum(ex_pr[1,]) # Scale to the same unit and check the propotion
ex_pr[2,] = ex_pr[2,]/sum(ex_pr[2,])
barplot(ex_pr, beside = T, legend.text = c("exp_Y","exp_N")) 

Word Cloud

wordcloud2(demoFreq, size = 2, minRotation = -pi/2, maxRotation = -pi/2) # 不是直排就是橫排, 緊密排成一個橢圓

Heatmap

table %>% d3heatmap(labrow = T, col=colorRamp(c('seagreen','lightyellow','red')), show_grid = F)
table(weekday=format(ts,'%w'), month=format(ts,'%m'))
​       month
weekday   01   02   03   04   05   06   07   08   09   10   11   12
​      0 2110 1837 2075 2070 2168 2239 2339 2304 2352 2424 2254 2144
​      1 2395 1937 2200 2323 2359 2187 2457 2288 2258 2399 2323 2271
​      2 2317 1885 2270 2118 2222 2183 2412 2251 2142 2416 2258 2317
​      3 2259 2007 2242 2060 2345 2347 2408 2428 2239 2484 2182 2415
​      4 2334 1904 2263 2099 2402 2190 2385 2464 2320 2280 2253 2425
​      5 2392 2036 2443 2388 2340 2566 2459 2591 2390 2692 2475 2512
​      6 2240 1905 2265 2222 2199 2290 2341 2246 2359 2391 2318 2342
library(d3heatmap)
table(format(ts,"%u"), format(ts,"%H")) %>% # %u: Week day(Monday is 1) # %H: Hours(00-23)
  as.data.frame.matrix %>% 
  d3heatmap(F,F,col=colorRamp(c('seagreen','lightyellow','red')))

pacman::p_load(manipulateWidget)  # install/load library
color = colorRamp(c('seagreen','lightyellow','red'))  # heatmap color
# make 2 heatmaps for each top5 location
L = lapply(top5, function(loc) {           
  weekday = format(ts5[TOP5$LocationDescription == loc],"%u")
  hour = format(ts5[TOP5$LocationDescription == loc], "%H")
  arrest = TOP5$Arrest[TOP5$LocationDescription == loc]
  list(count = as.data.frame.matrix( table(weekday, hour) ),
       arrest = tapply(arrest, list(weekday, hour), mean)) %>% 
    lapply(d3heatmap, F, F, col=color, show_grid=F, xaxis_height=8, yaxis_width=8)
  })
# plot 10 heatmaps at once
combineWidgets(
  list=do.call(c, L), ncol=2,
  title = "<h4>TOP5 7x24 : 竊盜案件數量 & 破案率</h4>",
  footer= "<p>由上至下:(1)住宅車道、(2)加油站、(3)巷弄、(4)公用停車場、(5)街道</p>"
  ) 

PCA Plot

Bubble plot

df = within(seo_ori, {bubblesize = seo_ori$avg_mon_search_volume})
g = df %>%
  ggplot(aes(x=company1, y=company2, col=category, label=keyword)) + 
  geom_point(aes(size=bubblesize), alpha=0.7) + 
  scale_size(range = c(1, 20)) +
  ggtitle("A Co. v.s. B Co. 的SEO排名表現") +
  theme(plot.title = element_text(hjust = 0.5))
ggplotly(g)  

Highcharter

Reference

treemap

stat = function(var){
  tree = df %>% 
    group_by({{var}}) %>% 
    summarise(n = n(), deal = sum(y)) %>% 
    mutate(ratio = deal/n) %>% 
    arrange(-ratio, -n, -deal)
  return(tree)
  }
  
hchart(stat(consultant), "treemap", hcaes(x = consultant, value = n, color = ratio))