---
disqus: ahb0222
GA : G-CQ4L16KHK4
---
# 使用R(RStudio Cloud)語言擷取觀測資料查詢系統氣象資料
> [color=#40f1ef][name=LHB阿好伯, 2020/02/09][:earth_africa:](https://www.facebook.com/LHB0222/)
###### tags: `R`
[TOC]
氣象局有一個[觀測資料查詢系統](https://e-service.cwb.gov.tw/HistoryDataQuery/)可以查詢各個氣象站的資料
剛好最近有學弟需要近兩年大寮測站的降雨資料
若是手動查詢則需要大量時間
所以我就幫忙寫了一段程式幫忙取得兩年間的降雨資料
以下是相關的程式碼
分享給大家
目前遇到一個問題在以google sheet作為資料庫中
DT::datatable中filter只能在前兩欄起作用
mutate新增的第三欄無法使用filter
若是將資料調換過來也是mutate新增的欄位沒有效果
![](https://i.imgur.com/ewkB55l.png)
# 抓取資料並輸出成CSV檔
```rust=
packages <- c("jsonlite", "rvest", "magrittr", "lubridate")
installed_packages <- packages %in% rownames(installed.packages())
if (any(installed_packages == FALSE)) {
install.packages(packages[!installed_packages])
}
# 載入所需套件
lapply(packages, library, character.only = TRUE) %>%
invisible()
# url <- "https://e-service.cwb.gov.tw/HistoryDataQuery/DayDataController.do?command=viewMain&station=C0V730&stname=%25E5%25A4%25A7%25E5%25AF%25AE&datepicker=2020-07-09#"
url_start <- "https://e-service.cwb.gov.tw/HistoryDataQuery/DayDataController.do?command=viewMain&station=C0V730&stname=%25E5%25A4%25A7%25E5%25AF%25AE&datepicker="
start_date <- ymd("2018-07-01")
end_date <- ymd("2020-07-31")
end_date - start_date
data1 <- data.frame()
data2 <- data.frame()
for(d in c(0:(end_date - start_date))){
url <- paste0(url_start,as.character(start_date + d),"#")
#時間資料
time_H <- url %>%
read_html() %>%
html_nodes(xpath='//*[(@id = "MyTable")]//td[(((count(preceding-sibling::*) + 1) = 1) and parent::*)]') %>%
html_text(trim = T) %>%
as.numeric()
#降雨資料
population <- url %>%
read_html() %>%
html_nodes(xpath='//td[(((count(preceding-sibling::*) + 1) = 11) and parent::*)]') %>%
html_text(trim = T) %>%
gsub("X","-9999", .) %>%
as.numeric()
data1 <- cbind(as.character(start_date + d), time_H, population)
data2 <- rbind(data2, data1)
write.csv(data1,file = paste0("C:/R/Data/", start_date + d, ".csv"))
}
write.csv(data2,file = paste0("C:/R/AllData.csv"))
```
# APP版本
```r=
# This is a Shiny web application. You can run the application by clicking
# the 'Run App' button above.
#
# Find out more about building applications with Shiny here:
#
# http://shiny.rstudio.com/
#
#packages <- c("jsonlite", "rvest", "magrittr", "lubridate")
#installed_packages <- packages %in% rownames(installed.packages())
#if (any(installed_packages == FALSE)) {
# install.packages(packages[!installed_packages])
#}
# 載入所需套件
#invisible(lapply(packages, library, character.only = TRUE))
library("jsonlite")
library("rvest")
library("magrittr")
library("lubridate")
library("shiny")
HistoryData <- function(start_date,end_date,Error_replaced){
url <- "https://e-service.cwb.gov.tw/HistoryDataQuery/DayDataController.do?command=viewMain&station=C0V730&stname=%25E5%25A4%25A7%25E5%25AF%25AE&datepicker=2020-07-09#"
url_start <- "https://e-service.cwb.gov.tw/HistoryDataQuery/DayDataController.do?command=viewMain&station=C0V730&stname=%25E5%25A4%25A7%25E5%25AF%25AE&datepicker="
end_date - start_date
data1 <- data.frame()
data2 <- data.frame()
for(d in c(0:(end_date - start_date))){
url <- paste0(url_start,as.character(start_date + d),"#")
#時間資料
time_H <- url %>%
read_html() %>%
html_nodes(xpath='//*[(@id = "MyTable")]//td[(((count(preceding-sibling::*) + 1) = 1) and parent::*)]') %>%
html_text(trim = T) %>%
as.numeric()
#降雨資料
population <- url %>%
read_html() %>%
html_nodes(xpath='//td[(((count(preceding-sibling::*) + 1) = 11) and parent::*)]') %>%
html_text(trim = T) %>%
gsub("X",Error_replaced, .) %>%
as.numeric()
#data1 <- cbind(as.character(start_date + d), time_H, population)
data3 <- cbind(as.character(start_date + d),sum(population))
data2 <- rbind(data3, data2)
#print(paste(as.character(start_date + d),data3))
#write.csv(data3,file = paste0("C:/R/Data/", start_date + d, ".csv"))
}
data2
}
# Define UI for application that draws a histogram
ui <- fluidPage(
titlePanel("查詢時間"),
# Default value is the date in client's time zone
# Application title
# Sidebar with a slider input for number of bins
sidebarLayout(
sidebarPanel(dateInput("start_date", "Date:"),
numericInput("Error_replaced", "測站錯誤資料:", -9999),
sliderInput("bins",
"回朔天數:",
min = 0,
max = 50,
value = 1)
),
# Show a plot of the generated distribution
mainPanel(
tableOutput("dataoutput")
)
)
)
# Define server logic required to draw a histogram
server <- function(input, output) {
output$dataoutput <- renderTable(HistoryData(input$start_date,input$start_date - input$bins,input$Error_replaced))
}
# Run the application
shinyApp(ui = ui, server = server)
```
## [展示網址](https://p56064055.shinyapps.io/E-serviceHistoryDataQuery/)
# 前七天加總資料
```rust=
#
# This is a Shiny web application. You can run the application by clicking
# the 'Run App' button above.
#
# Find out more about building applications with Shiny here:
#
# http://shiny.rstudio.com/
#
#packages <- c("jsonlite", "rvest", "magrittr", "lubridate")
#installed_packages <- packages %in% rownames(installed.packages())
#if (any(installed_packages == FALSE)) {
# install.packages(packages[!installed_packages])
#}
# 載入所需套件
#invisible(lapply(packages, library, character.only = TRUE))
library("jsonlite")
library("rvest")
library("magrittr")
library("lubridate")
library("shiny")
HistoryData <- function(start_date,Error_replaced){
# url <- "https://e-service.cwb.gov.tw/HistoryDataQuery/DayDataController.do?command=viewMain&station=C0V730&stname=%25E5%25A4%25A7%25E5%25AF%25AE&datepicker=2020-07-09#"
url_start <- "https://e-service.cwb.gov.tw/HistoryDataQuery/DayDataController.do?command=viewMain&station=C0V730&stname=%25E5%25A4%25A7%25E5%25AF%25AE&datepicker="
url <- paste0(url_start,as.character(start_date),"#")
#時間資料
# time_H <- url %>%
# read_html() %>%
# html_nodes(xpath='//*[(@id = "MyTable")]//td[(((count(preceding-sibling::*) + 1) = 1) and parent::*)]') %>%
# html_text(trim = T) %>%
# as.numeric()
#降雨資料
population <- url %>%
read_html() %>%
html_nodes(xpath='//td[(((count(preceding-sibling::*) + 1) = 11) and parent::*)]') %>%
html_text(trim = T) %>%
gsub("X",Error_replaced, .) %>%
as.numeric()
#data1 <- cbind(as.character(start_date + d), time_H, population)
sum(population)
#print(paste(as.character(start_date + d),data3))
#write.csv(data3,file = paste0("C:/R/Data/", start_date + d, ".csv"))
}
DayJudge <- function(startDate,BackDay,Error_replaced){
data4 <- data.frame()
data5 <- data.frame()
for (e in c(0:BackDay)) {
data4 <- cbind(as.character(startDate - e),
sum(HistoryData(startDate - e,Error_replaced),
HistoryData(startDate - e-1,Error_replaced),
HistoryData(startDate - e-2,Error_replaced),
HistoryData(startDate - e-3,Error_replaced),
HistoryData(startDate - e-4,Error_replaced),
HistoryData(startDate - e-5,Error_replaced),
HistoryData(startDate - e-6,Error_replaced)
)
)
data5 <- rbind(data4, data5)
}
data5
}
# Define UI for application that draws a histogram
ui <- fluidPage(
titlePanel("查詢前七天降雨加總"),
# Default value is the date in client's time zone
# Application title
# Sidebar with a slider input for number of bins
sidebarLayout(
sidebarPanel(dateInput("start_date", "Date:"),
numericInput("Error_replaced", "測站錯誤資料:", 0),
sliderInput("bins",
"回朔天數:",
min = 0,
max = 50,
value = 1)
),
# Show a plot of the generated distribution
mainPanel(
tableOutput("dataoutput")
)
)
)
# Define server logic required to draw a histogram
server <- function(input, output) {
output$dataoutput <- renderTable(DayJudge(input$start_date,input$bins,input$Error_replaced))
}
# Run the application
shinyApp(ui = ui, server = server)
```
## [展示網站](https://p56064055.shinyapps.io/TotalFromPreviousWeek/)
# 以google sheet作為資料庫
```rust=
#以google sheet做為資料庫
#install.packages("lubridate")
#install.packages("googlesheets4")
#install.packages("dplyr")
#install.packages("shiny")
#install.packages("DT")
#install.packages("magrittr")
library(lubridate)
library(googlesheets4)
library(dplyr)
library(shiny)
library(DT)
library(magrittr)
gs4_deauth()
#starttime <- 7
#endtime <- 17
google_sheet_URL <- "https://docs.google.com/spreadsheets/d/1cAEG8RqpJoDJukBY83ffK8fvdHSV2pGT3F1L9C8zQlE/edit?usp=sharing"
google_sheet_URL2 <- "https://docs.google.com/spreadsheets/d/1ZtHk0VsMcYMSXDOnUyBWcVJvv4qPJtah1Ylst7139D4/edit?usp=sharing"
gdata <- read_sheet(google_sheet_URL) #%>% as.data.frame()
gdata$`date-D` %<>% ymd()
gdata$time_H %<>% as.numeric()
head(gdata)
gdata2 <- read_sheet(google_sheet_URL2) %>% as.data.frame()
gdata2$`date-D` %<>% ymd()
gdata2$`population-mm` %<>% as.numeric()
gdata2$Last_Week_Total %<>% as.numeric()
head(gdata2)
ui <- fluidPage(
titlePanel("查詢降雨加總"),
dateRangeInput("daterange", "Date range:",
min = min(gdata$`date-D`),
max = max(gdata$`date-D`)),
mainPanel(
DTOutput("dataoutput")
))
server <- function(input, output) {
output$dataoutput <- renderDT(gdata[,-2] %>%
filter(gdata$`date-D`>= as.Date(input$daterange[1],"%Y-%m-%e") & gdata$`date-D`<= as.Date(input$daterange[2],"%Y-%m-%e")) %>%
group_by(`date-D`) %>%
summarise_all(sum) %>%
mutate(Last_Week_Total= gdata2 %>%
filter(gdata2$`date-D`>= as.Date(input$daterange[1],"%Y-%m-%e") & gdata2$`date-D`<= as.Date(input$daterange[2],"%Y-%m-%e")) %>%
select(`Last_Week_Total`)) %>%
DT::datatable(colnames =c("Date","Total rainfall","Last week total rainfall"),filter = 'top',extensions = 'Buttons', options = list(
dom = 'Bfrtip', buttons = c('copy', 'csv', 'excel', 'pdf', 'print'))) %>%
formatStyle('population-mm',
color = styleInterval(0,c('blak','red'))) %>%
formatStyle('Last_Week_Total',
color = styleInterval(0,c('blak','red'))) %>%
formatRound('Last_Week_Total',1)
)
}
# Run the application
shinyApp(ui = ui, server = server)
```
## [展示網址](https://p56064055.shinyapps.io/DatabaseQuery/)
# 增加三天加總
```rust=
#以google sheet做為資料庫
#install.packages("lubridate")
#install.packages("googlesheets4")
#install.packages("dplyr")
#install.packages("shiny")
#install.packages("DT")
#install.packages("magrittr")
library(lubridate)
library(googlesheets4)
library(dplyr)
library(shiny)
library(DT)
library(magrittr)
gs4_deauth()
#starttime <- 7
#endtime <- 17
google_sheet_URL <- "https://docs.google.com/spreadsheets/d/1cAEG8RqpJoDJukBY83ffK8fvdHSV2pGT3F1L9C8zQlE/edit?usp=sharing"
google_sheet_URL2 <- "https://docs.google.com/spreadsheets/d/1ZtHk0VsMcYMSXDOnUyBWcVJvv4qPJtah1Ylst7139D4/edit?usp=sharing"
google_sheet_URL3 <- "https://docs.google.com/spreadsheets/d/1Hz3HpnzL4jP7BEqFw-NqAk1C53WuMjdVttaQGQAeds8/edit?usp=sharing"
gdata <- read_sheet(google_sheet_URL) #%>% as.data.frame()
gdata$`date-D` %<>% ymd()
gdata$time_H %<>% as.numeric()
head(gdata)
gdata2 <- read_sheet(google_sheet_URL2) %>% as.data.frame()
gdata2$`date-D` %<>% ymd()
gdata2$`population-mm` %<>% as.numeric()
gdata2$Last_Week_Total %<>% as.numeric()
head(gdata2)
gdata3 <- read_sheet(google_sheet_URL3) %>% as.data.frame()
gdata3$`date-D` %<>% ymd()
gdata3$`population-mm` %<>% as.numeric()
gdata3$Last_3Day_Total %<>% as.numeric()
ui <- fluidPage(
titlePanel("查詢降雨加總"),
dateRangeInput("daterange", "Date range:",
min = min(gdata$`date-D`),
max = max(gdata$`date-D`)),
mainPanel(
DTOutput("dataoutput")
))
server <- function(input, output) {
output$dataoutput <- renderDT(gdata[,-2] %>%
filter(gdata$`date-D`>= as.Date(input$daterange[1],"%Y-%m-%e") & gdata$`date-D`<= as.Date(input$daterange[2],"%Y-%m-%e")) %>%
group_by(`date-D`) %>%
summarise_all(sum) %>%
mutate(Last_Week_Total= gdata2 %>%
filter(gdata2$`date-D`>= as.Date(input$daterange[1],"%Y-%m-%e") & gdata2$`date-D`<= as.Date(input$daterange[2],"%Y-%m-%e")) %>%
select(`Last_Week_Total`)) %>%
mutate(Last_3Day_Total= gdata3 %>%
filter(gdata3$`date-D`>= as.Date(input$daterange[1],"%Y-%m-%e") & gdata3$`date-D`<= as.Date(input$daterange[2],"%Y-%m-%e")) %>%
select(`Last_3Day_Total`)) %>%
DT::datatable(colnames =c("Date","Total rainfall","Last week total rainfall","Last 3 day total rainfall"),filter = 'top',extensions = 'Buttons', options = list(
dom = 'Bfrtip', buttons = c('copy', 'csv', 'excel', 'pdf', 'print'))) %>%
formatStyle('population-mm',
color = styleInterval(0,c('blak','red'))) %>%
formatStyle('Last_Week_Total',
color = styleInterval(0,c('blak','red'))) %>%
formatRound('Last_Week_Total',1)
)
}
# Run the application
shinyApp(ui = ui, server = server)
```
# 參考資料
[輕鬆學習 R 語言:網頁資料擷取](https://medium.com/datainpoint/r-essentials-web-scraping-8d0222c1e8d5)
全文分享至
https://www.facebook.com/LHB0222/
有疑問想討論的都歡迎於下方留言
喜歡的幫我分享給所有的朋友 \o/
有所錯誤歡迎指教
# [:page_with_curl: 全部文章列表](https://hackmd.io/@LHB-0222/AllWritings)
![](https://i.imgur.com/47HlvGH.png)