--- disqus: ahb0222 GA : G-CQ4L16KHK4 --- # 使用R(RStudio Cloud)語言擷取觀測資料查詢系統氣象資料 > [color=#40f1ef][name=LHB阿好伯, 2020/02/09][:earth_africa:](https://www.facebook.com/LHB0222/) ###### tags: `R` [TOC] 氣象局有一個[觀測資料查詢系統](https://e-service.cwb.gov.tw/HistoryDataQuery/)可以查詢各個氣象站的資料 剛好最近有學弟需要近兩年大寮測站的降雨資料 若是手動查詢則需要大量時間 所以我就幫忙寫了一段程式幫忙取得兩年間的降雨資料 以下是相關的程式碼 分享給大家 目前遇到一個問題在以google sheet作為資料庫中 DT::datatable中filter只能在前兩欄起作用 mutate新增的第三欄無法使用filter 若是將資料調換過來也是mutate新增的欄位沒有效果 ![](https://i.imgur.com/ewkB55l.png) # 抓取資料並輸出成CSV檔 ```rust= packages <- c("jsonlite", "rvest", "magrittr", "lubridate") installed_packages <- packages %in% rownames(installed.packages()) if (any(installed_packages == FALSE)) { install.packages(packages[!installed_packages]) } # 載入所需套件 lapply(packages, library, character.only = TRUE) %>% invisible() # url <- "https://e-service.cwb.gov.tw/HistoryDataQuery/DayDataController.do?command=viewMain&station=C0V730&stname=%25E5%25A4%25A7%25E5%25AF%25AE&datepicker=2020-07-09#" url_start <- "https://e-service.cwb.gov.tw/HistoryDataQuery/DayDataController.do?command=viewMain&station=C0V730&stname=%25E5%25A4%25A7%25E5%25AF%25AE&datepicker=" start_date <- ymd("2018-07-01") end_date <- ymd("2020-07-31") end_date - start_date data1 <- data.frame() data2 <- data.frame() for(d in c(0:(end_date - start_date))){ url <- paste0(url_start,as.character(start_date + d),"#") #時間資料 time_H <- url %>% read_html() %>% html_nodes(xpath='//*[(@id = "MyTable")]//td[(((count(preceding-sibling::*) + 1) = 1) and parent::*)]') %>% html_text(trim = T) %>% as.numeric() #降雨資料 population <- url %>% read_html() %>% html_nodes(xpath='//td[(((count(preceding-sibling::*) + 1) = 11) and parent::*)]') %>% html_text(trim = T) %>% gsub("X","-9999", .) %>% as.numeric() data1 <- cbind(as.character(start_date + d), time_H, population) data2 <- rbind(data2, data1) write.csv(data1,file = paste0("C:/R/Data/", start_date + d, ".csv")) } write.csv(data2,file = paste0("C:/R/AllData.csv")) ``` # APP版本 ```r= # This is a Shiny web application. You can run the application by clicking # the 'Run App' button above. # # Find out more about building applications with Shiny here: # # http://shiny.rstudio.com/ # #packages <- c("jsonlite", "rvest", "magrittr", "lubridate") #installed_packages <- packages %in% rownames(installed.packages()) #if (any(installed_packages == FALSE)) { # install.packages(packages[!installed_packages]) #} # 載入所需套件 #invisible(lapply(packages, library, character.only = TRUE)) library("jsonlite") library("rvest") library("magrittr") library("lubridate") library("shiny") HistoryData <- function(start_date,end_date,Error_replaced){ url <- "https://e-service.cwb.gov.tw/HistoryDataQuery/DayDataController.do?command=viewMain&station=C0V730&stname=%25E5%25A4%25A7%25E5%25AF%25AE&datepicker=2020-07-09#" url_start <- "https://e-service.cwb.gov.tw/HistoryDataQuery/DayDataController.do?command=viewMain&station=C0V730&stname=%25E5%25A4%25A7%25E5%25AF%25AE&datepicker=" end_date - start_date data1 <- data.frame() data2 <- data.frame() for(d in c(0:(end_date - start_date))){ url <- paste0(url_start,as.character(start_date + d),"#") #時間資料 time_H <- url %>% read_html() %>% html_nodes(xpath='//*[(@id = "MyTable")]//td[(((count(preceding-sibling::*) + 1) = 1) and parent::*)]') %>% html_text(trim = T) %>% as.numeric() #降雨資料 population <- url %>% read_html() %>% html_nodes(xpath='//td[(((count(preceding-sibling::*) + 1) = 11) and parent::*)]') %>% html_text(trim = T) %>% gsub("X",Error_replaced, .) %>% as.numeric() #data1 <- cbind(as.character(start_date + d), time_H, population) data3 <- cbind(as.character(start_date + d),sum(population)) data2 <- rbind(data3, data2) #print(paste(as.character(start_date + d),data3)) #write.csv(data3,file = paste0("C:/R/Data/", start_date + d, ".csv")) } data2 } # Define UI for application that draws a histogram ui <- fluidPage( titlePanel("查詢時間"), # Default value is the date in client's time zone # Application title # Sidebar with a slider input for number of bins sidebarLayout( sidebarPanel(dateInput("start_date", "Date:"), numericInput("Error_replaced", "測站錯誤資料:", -9999), sliderInput("bins", "回朔天數:", min = 0, max = 50, value = 1) ), # Show a plot of the generated distribution mainPanel( tableOutput("dataoutput") ) ) ) # Define server logic required to draw a histogram server <- function(input, output) { output$dataoutput <- renderTable(HistoryData(input$start_date,input$start_date - input$bins,input$Error_replaced)) } # Run the application shinyApp(ui = ui, server = server) ``` ## [展示網址](https://p56064055.shinyapps.io/E-serviceHistoryDataQuery/) # 前七天加總資料 ```rust= # # This is a Shiny web application. You can run the application by clicking # the 'Run App' button above. # # Find out more about building applications with Shiny here: # # http://shiny.rstudio.com/ # #packages <- c("jsonlite", "rvest", "magrittr", "lubridate") #installed_packages <- packages %in% rownames(installed.packages()) #if (any(installed_packages == FALSE)) { # install.packages(packages[!installed_packages]) #} # 載入所需套件 #invisible(lapply(packages, library, character.only = TRUE)) library("jsonlite") library("rvest") library("magrittr") library("lubridate") library("shiny") HistoryData <- function(start_date,Error_replaced){ # url <- "https://e-service.cwb.gov.tw/HistoryDataQuery/DayDataController.do?command=viewMain&station=C0V730&stname=%25E5%25A4%25A7%25E5%25AF%25AE&datepicker=2020-07-09#" url_start <- "https://e-service.cwb.gov.tw/HistoryDataQuery/DayDataController.do?command=viewMain&station=C0V730&stname=%25E5%25A4%25A7%25E5%25AF%25AE&datepicker=" url <- paste0(url_start,as.character(start_date),"#") #時間資料 # time_H <- url %>% # read_html() %>% # html_nodes(xpath='//*[(@id = "MyTable")]//td[(((count(preceding-sibling::*) + 1) = 1) and parent::*)]') %>% # html_text(trim = T) %>% # as.numeric() #降雨資料 population <- url %>% read_html() %>% html_nodes(xpath='//td[(((count(preceding-sibling::*) + 1) = 11) and parent::*)]') %>% html_text(trim = T) %>% gsub("X",Error_replaced, .) %>% as.numeric() #data1 <- cbind(as.character(start_date + d), time_H, population) sum(population) #print(paste(as.character(start_date + d),data3)) #write.csv(data3,file = paste0("C:/R/Data/", start_date + d, ".csv")) } DayJudge <- function(startDate,BackDay,Error_replaced){ data4 <- data.frame() data5 <- data.frame() for (e in c(0:BackDay)) { data4 <- cbind(as.character(startDate - e), sum(HistoryData(startDate - e,Error_replaced), HistoryData(startDate - e-1,Error_replaced), HistoryData(startDate - e-2,Error_replaced), HistoryData(startDate - e-3,Error_replaced), HistoryData(startDate - e-4,Error_replaced), HistoryData(startDate - e-5,Error_replaced), HistoryData(startDate - e-6,Error_replaced) ) ) data5 <- rbind(data4, data5) } data5 } # Define UI for application that draws a histogram ui <- fluidPage( titlePanel("查詢前七天降雨加總"), # Default value is the date in client's time zone # Application title # Sidebar with a slider input for number of bins sidebarLayout( sidebarPanel(dateInput("start_date", "Date:"), numericInput("Error_replaced", "測站錯誤資料:", 0), sliderInput("bins", "回朔天數:", min = 0, max = 50, value = 1) ), # Show a plot of the generated distribution mainPanel( tableOutput("dataoutput") ) ) ) # Define server logic required to draw a histogram server <- function(input, output) { output$dataoutput <- renderTable(DayJudge(input$start_date,input$bins,input$Error_replaced)) } # Run the application shinyApp(ui = ui, server = server) ``` ## [展示網站](https://p56064055.shinyapps.io/TotalFromPreviousWeek/) # 以google sheet作為資料庫 ```rust= #以google sheet做為資料庫 #install.packages("lubridate") #install.packages("googlesheets4") #install.packages("dplyr") #install.packages("shiny") #install.packages("DT") #install.packages("magrittr") library(lubridate) library(googlesheets4) library(dplyr) library(shiny) library(DT) library(magrittr) gs4_deauth() #starttime <- 7 #endtime <- 17 google_sheet_URL <- "https://docs.google.com/spreadsheets/d/1cAEG8RqpJoDJukBY83ffK8fvdHSV2pGT3F1L9C8zQlE/edit?usp=sharing" google_sheet_URL2 <- "https://docs.google.com/spreadsheets/d/1ZtHk0VsMcYMSXDOnUyBWcVJvv4qPJtah1Ylst7139D4/edit?usp=sharing" gdata <- read_sheet(google_sheet_URL) #%>% as.data.frame() gdata$`date-D` %<>% ymd() gdata$time_H %<>% as.numeric() head(gdata) gdata2 <- read_sheet(google_sheet_URL2) %>% as.data.frame() gdata2$`date-D` %<>% ymd() gdata2$`population-mm` %<>% as.numeric() gdata2$Last_Week_Total %<>% as.numeric() head(gdata2) ui <- fluidPage( titlePanel("查詢降雨加總"), dateRangeInput("daterange", "Date range:", min = min(gdata$`date-D`), max = max(gdata$`date-D`)), mainPanel( DTOutput("dataoutput") )) server <- function(input, output) { output$dataoutput <- renderDT(gdata[,-2] %>% filter(gdata$`date-D`>= as.Date(input$daterange[1],"%Y-%m-%e") & gdata$`date-D`<= as.Date(input$daterange[2],"%Y-%m-%e")) %>% group_by(`date-D`) %>% summarise_all(sum) %>% mutate(Last_Week_Total= gdata2 %>% filter(gdata2$`date-D`>= as.Date(input$daterange[1],"%Y-%m-%e") & gdata2$`date-D`<= as.Date(input$daterange[2],"%Y-%m-%e")) %>% select(`Last_Week_Total`)) %>% DT::datatable(colnames =c("Date","Total rainfall","Last week total rainfall"),filter = 'top',extensions = 'Buttons', options = list( dom = 'Bfrtip', buttons = c('copy', 'csv', 'excel', 'pdf', 'print'))) %>% formatStyle('population-mm', color = styleInterval(0,c('blak','red'))) %>% formatStyle('Last_Week_Total', color = styleInterval(0,c('blak','red'))) %>% formatRound('Last_Week_Total',1) ) } # Run the application shinyApp(ui = ui, server = server) ``` ## [展示網址](https://p56064055.shinyapps.io/DatabaseQuery/) # 增加三天加總 ```rust= #以google sheet做為資料庫 #install.packages("lubridate") #install.packages("googlesheets4") #install.packages("dplyr") #install.packages("shiny") #install.packages("DT") #install.packages("magrittr") library(lubridate) library(googlesheets4) library(dplyr) library(shiny) library(DT) library(magrittr) gs4_deauth() #starttime <- 7 #endtime <- 17 google_sheet_URL <- "https://docs.google.com/spreadsheets/d/1cAEG8RqpJoDJukBY83ffK8fvdHSV2pGT3F1L9C8zQlE/edit?usp=sharing" google_sheet_URL2 <- "https://docs.google.com/spreadsheets/d/1ZtHk0VsMcYMSXDOnUyBWcVJvv4qPJtah1Ylst7139D4/edit?usp=sharing" google_sheet_URL3 <- "https://docs.google.com/spreadsheets/d/1Hz3HpnzL4jP7BEqFw-NqAk1C53WuMjdVttaQGQAeds8/edit?usp=sharing" gdata <- read_sheet(google_sheet_URL) #%>% as.data.frame() gdata$`date-D` %<>% ymd() gdata$time_H %<>% as.numeric() head(gdata) gdata2 <- read_sheet(google_sheet_URL2) %>% as.data.frame() gdata2$`date-D` %<>% ymd() gdata2$`population-mm` %<>% as.numeric() gdata2$Last_Week_Total %<>% as.numeric() head(gdata2) gdata3 <- read_sheet(google_sheet_URL3) %>% as.data.frame() gdata3$`date-D` %<>% ymd() gdata3$`population-mm` %<>% as.numeric() gdata3$Last_3Day_Total %<>% as.numeric() ui <- fluidPage( titlePanel("查詢降雨加總"), dateRangeInput("daterange", "Date range:", min = min(gdata$`date-D`), max = max(gdata$`date-D`)), mainPanel( DTOutput("dataoutput") )) server <- function(input, output) { output$dataoutput <- renderDT(gdata[,-2] %>% filter(gdata$`date-D`>= as.Date(input$daterange[1],"%Y-%m-%e") & gdata$`date-D`<= as.Date(input$daterange[2],"%Y-%m-%e")) %>% group_by(`date-D`) %>% summarise_all(sum) %>% mutate(Last_Week_Total= gdata2 %>% filter(gdata2$`date-D`>= as.Date(input$daterange[1],"%Y-%m-%e") & gdata2$`date-D`<= as.Date(input$daterange[2],"%Y-%m-%e")) %>% select(`Last_Week_Total`)) %>% mutate(Last_3Day_Total= gdata3 %>% filter(gdata3$`date-D`>= as.Date(input$daterange[1],"%Y-%m-%e") & gdata3$`date-D`<= as.Date(input$daterange[2],"%Y-%m-%e")) %>% select(`Last_3Day_Total`)) %>% DT::datatable(colnames =c("Date","Total rainfall","Last week total rainfall","Last 3 day total rainfall"),filter = 'top',extensions = 'Buttons', options = list( dom = 'Bfrtip', buttons = c('copy', 'csv', 'excel', 'pdf', 'print'))) %>% formatStyle('population-mm', color = styleInterval(0,c('blak','red'))) %>% formatStyle('Last_Week_Total', color = styleInterval(0,c('blak','red'))) %>% formatRound('Last_Week_Total',1) ) } # Run the application shinyApp(ui = ui, server = server) ``` # 參考資料 [輕鬆學習 R 語言:網頁資料擷取](https://medium.com/datainpoint/r-essentials-web-scraping-8d0222c1e8d5) 全文分享至 https://www.facebook.com/LHB0222/ 有疑問想討論的都歡迎於下方留言 喜歡的幫我分享給所有的朋友 \o/ 有所錯誤歡迎指教 # [:page_with_curl: 全部文章列表](https://hackmd.io/@LHB-0222/AllWritings) ![](https://i.imgur.com/47HlvGH.png)