R
Visualization
Other Reference:
Rpubs
date: '`r Sys.Date()`'
output: rmdformats::material # readthedown
date: "`r Sys.Date()`"
output:
prettydoc::html_pretty:
number_sections: yes
theme: cayman # hpstr
toc: true
output:
html_document:
highlight: pygments
theme: flatly
css: style.css
toc: true
toc_depth: 3
toc_float:
collapsed: false
library(corrplot)
dfcor = cor(df[,c(4:5,7,10:11)])
round(dfcor, 2)
symnum(dfcor, abbr.colnames = F)
corrplot(dfcor, type = "upper", order = "hclust", tl.col = "black", tl.srt = 0)
corrplot.mixed(cor(df[,c(4:5,7,10:11)]), upper="ellipse")
col = colorRampPalette(c("blue", "white", "red"))(20)
heatmap(x = res, col = col, symm = TRUE) # symm表示是否對稱
library(PerformanceAnalytics)
chart.Correlation(mydata, histogram = T, pch=19)
ggplot()+
theme(text=element_text(family="蘋方-繁 中黑體", size=12),
plot.title=element_text(hjust = 0.5))
ZM = Z0 %>% mutate(month = format(as.Date(date),format="%m"))
age_ratio = as.data.frame(prop.table(table(ZM$age,ZM$month)))
ZM = left_join(ZM, age_ratio, by=c("age"="Var1", "month"="Var2"))
ggplot(ZM, aes(x = month, y = Freq, group = age, color=age)) +
geom_point() +
geom_line() # 沒有做group會無法把點跟點一起連線
# 熱門男生名字的『比例』
mtop = mbaby %>% group_by(year) %>% top_n(1, number) %>%
pull(name) %>% unique()
mbaby %>% group_by(year) %>%
mutate(fraction = number / sum(number)) %>%
filter(name %in% mtop) %>%
ggplot(aes(x=year,y=fraction,col=name)) +
geom_line(alpha=0.5) + geom_point(size=0.5) -> g
ggplotly(g)
ggplot(ex1221, aes(Discharge, Area)) +
geom_point(aes(size=NO3)) +
xlab("My x label") +
ylab("My y label") +
ggtitle("Weighted Scatterplot of Watershed Area vs. Discharge and Nitrogen Levels (PPM)")
ggplot(ex1221, aes(Discharge, Area)) +
geom_point(aes(size=NO3)) +
labs(size= "Nitrogen",
x = "My x label",
y = "My y label",
title = "Weighted Scatterplot of Watershed Area vs. Discharge and Nitrogen Levels (PPM)")
df %>%
ggplot(aes(as.factor(mon), fill = dept)) +
geom_bar() +
geom_text(stat = "count", aes(label = (..count..)), vjust=0.5, position=position_dodge(width=1), vjust=-0.25, size=4) +
theme(legend.position='none',
text = element_text(family="蘋方-繁 中黑體", size=12),
plot.title = element_text(size=24, hjust = 0.5)) +
xlab("實習時長(月數)") +
ylab("人數") +
facet_wrap(~ dept, scales = "free") +
ggtitle("實習時長分析-依各系")
df %>%
ggplot(aes(as.factor(mon), fill = dept)) +
geom_bar() +
geom_text(stat = "count", aes(label = (..count..)), position=position_stack(vjust=0.5), size=4) +
theme(text = element_text(family="蘋方-繁 中黑體", size=12),
plot.title = element_text(size=24, hjust = 0.5)) +
xlab("月數") +
ylab("人數") +
ggtitle("實習時長分析-依月數(加總)")
df %>% ggplot(aes(prov, fill = expr)) +
geom_bar(position = 'dodge') +
scale_x_discrete(labels=c("correct","not_provide","incorrect"))
ggplot(aes(x= artDate,y=sentiment,fill=method)) +
geom_col(show.legend = FALSE) + # 黏在一起的長條圖(分顏色)
scale_x_date(labels = date_format("%m/%d")) + # 日期format
# 依照method切開長條圖成兩張scale一樣的圖, col數選1就是上下, 2為左右
facet_wrap(~method, ncol = 2, scales = "fixed")+
geom_text(aes(label=sentiment)) # 標數值
word_count %>%
inner_join(LIWC_ch) %>%
group_by(sentiment) %>%
top_n(10,wt = count) %>% # wt: Use for ordering. If not specified, defaults to the last variable in the tbl.
ungroup() %>% # 先ungroup才可以重新排列
mutate(word = reorder(word, count)) %>%
ggplot(aes(word, count, fill = sentiment)) +
geom_col(show.legend = FALSE) +
facet_wrap(~sentiment, scales = "free_y") +
labs(y = "Contribution to sentiment",
x = NULL) +
theme(text=element_text(family="蘋方-繁 中黑體", size=14))+
coord_flip() # 直轉橫
df %>%
group_by(dept) %>%
count(comDept) %>%
ggplot(aes(comDept, n, fill=dept)) +
geom_col() +
geom_text(aes(y=n, label=n), position=position_dodge(width=1), vjust=-0.25, size=4) +
theme(legend.position='none',
text = element_text(family="蘋方-繁 中黑體", size=12),
plot.title = element_text(size=24, hjust = 0.5)) +
xlab("實習部門") +
ylab("人數") +
facet_wrap(~ dept, scales = "free") +
ggtitle("各系實習部門分析-依系所")
trend = function(var, col){
stat({{var}}) %>%
ggplot() +
geom_bar(mapping = aes(x = {{var}}, y = n), stat = "identity", fill = col) +
geom_line(mapping = aes(x = {{var}}, y = ratio*554, group=1), size = 1.3, color = "#024349") +
scale_y_continuous(name = "Total",
sec.axis = sec_axis(~./554, name = "Turnover Rate",
labels = function(b) { paste0(round(b * 100, 0), "%")})) +
theme(
axis.title.y = element_text(color = col),
axis.title.y.right = element_text(color = "#024349"))
}
trend(year, "#FAD589")
simps_cnt %>%
inner_join(afinn) %>%
group_by(episode, value) %>%
summarise(cnt = sum(count)) %>% ggplot() +
geom_boxplot(aes(x=episode, y=value, colour=as.factor(episode))) +
ggtitle("Afinn-每集情緒差異") +
scale_x_continuous(breaks=seq(1,16,1)) + # 將x軸全部的點都標出來 # 原本只有1,5,10,15
theme(text=element_text(family="蘋方-繁 中黑體", size=12),
plot.title=element_text(hjust = 0.5)) -> p_afinn
plotly::ggplotly(p_afinn)
plot1 = df %>% ggplot(aes(sex,scale(itr))) +
geom_boxplot() +
scale_x_discrete(labels=c("male","female"))
plot2 = df %>% ggplot(aes(sex,scale(ip))) +
geom_boxplot() +
scale_x_discrete(labels=c("male","female"))
gridExtra::grid.arrange(plot1, plot2, nrow=1, ncol=2)
將原先三個欄位"sex","itr"和"ip"變為以下三欄位(更長):
即能用同樣的資料集,將上面的組合圖整合為一張圖
itr_ip = melt(df[,c(4:6)], id.var = "sex")
itr_ip %>%
ggplot(aes(variable,scale(value))) +
geom_boxplot(aes(fill=sex)) +
scale_fill_manual(values=c("#0099CC","#FF6666"))
基本繪圖功能:
plot(Y~X, family="蘋方-繁 中黑體")
par(mfrow=c(2,2))
plot(SLID$wages~SLID$language)
plot(SLID$wages~SLID$age)
plot(SLID$wages~SLID$education)
plot(SLID$wages~SLID$sex)
#兩行不同顏色為一組分類的長條圖 matrix[3x2]
ex_pr = as.matrix(table(df$expr,df$prov))
colnames(ex_pr) = c("correct","not_provide","incorrect")
ex_pr[1,] = ex_pr[1,]/sum(ex_pr[1,]) # Scale to the same unit and check the propotion
ex_pr[2,] = ex_pr[2,]/sum(ex_pr[2,])
barplot(ex_pr, beside = T, legend.text = c("exp_Y","exp_N"))
wordcloud2(demoFreq, size = 2, minRotation = -pi/2, maxRotation = -pi/2) # 不是直排就是橫排, 緊密排成一個橢圓
table %>% d3heatmap(labrow = T, col=colorRamp(c('seagreen','lightyellow','red')), show_grid = F)
table(weekday=format(ts,'%w'), month=format(ts,'%m'))
month
weekday 01 02 03 04 05 06 07 08 09 10 11 12
0 2110 1837 2075 2070 2168 2239 2339 2304 2352 2424 2254 2144
1 2395 1937 2200 2323 2359 2187 2457 2288 2258 2399 2323 2271
2 2317 1885 2270 2118 2222 2183 2412 2251 2142 2416 2258 2317
3 2259 2007 2242 2060 2345 2347 2408 2428 2239 2484 2182 2415
4 2334 1904 2263 2099 2402 2190 2385 2464 2320 2280 2253 2425
5 2392 2036 2443 2388 2340 2566 2459 2591 2390 2692 2475 2512
6 2240 1905 2265 2222 2199 2290 2341 2246 2359 2391 2318 2342
library(d3heatmap)
table(format(ts,"%u"), format(ts,"%H")) %>% # %u: Week day(Monday is 1) # %H: Hours(00-23)
as.data.frame.matrix %>%
d3heatmap(F,F,col=colorRamp(c('seagreen','lightyellow','red')))
pacman::p_load(manipulateWidget) # install/load library
color = colorRamp(c('seagreen','lightyellow','red')) # heatmap color
# make 2 heatmaps for each top5 location
L = lapply(top5, function(loc) {
weekday = format(ts5[TOP5$LocationDescription == loc],"%u")
hour = format(ts5[TOP5$LocationDescription == loc], "%H")
arrest = TOP5$Arrest[TOP5$LocationDescription == loc]
list(count = as.data.frame.matrix( table(weekday, hour) ),
arrest = tapply(arrest, list(weekday, hour), mean)) %>%
lapply(d3heatmap, F, F, col=color, show_grid=F, xaxis_height=8, yaxis_width=8)
})
# plot 10 heatmaps at once
combineWidgets(
list=do.call(c, L), ncol=2,
title = "<h4>TOP5 7x24 : 竊盜案件數量 & 破案率</h4>",
footer= "<p>由上至下:(1)住宅車道、(2)加油站、(3)巷弄、(4)公用停車場、(5)街道</p>"
)
df = within(seo_ori, {bubblesize = seo_ori$avg_mon_search_volume})
g = df %>%
ggplot(aes(x=company1, y=company2, col=category, label=keyword)) +
geom_point(aes(size=bubblesize), alpha=0.7) +
scale_size(range = c(1, 20)) +
ggtitle("A Co. v.s. B Co. 的SEO排名表現") +
theme(plot.title = element_text(hjust = 0.5))
ggplotly(g)
stat = function(var){
tree = df %>%
group_by({{var}}) %>%
summarise(n = n(), deal = sum(y)) %>%
mutate(ratio = deal/n) %>%
arrange(-ratio, -n, -deal)
return(tree)
}
hchart(stat(consultant), "treemap", hcaes(x = consultant, value = n, color = ratio))