Try   HackMD

使用Python selenium 爬取codis氣候資料服務系統資料並使用ggplot2繪製風花圖

LHB阿好伯, 2024/01/20

from selenium import webdriver from selenium.webdriver.chrome.service import Service from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import Select service = Service("C:/Users/gtgrt/chromedriver_win32/chromedriver.exe") driver = webdriver.Chrome(service=service) driver.get("https://codis.cwa.gov.tw/StationData") element = driver.find_element(By.XPATH, '//*[@id="auto_C0"]') element.click() # 定位下拉選單元素 select_element = driver.find_element(By.XPATH, '//*[@id="station_area"]') # 創建Select對象 select = Select(select_element) # 選擇下拉選單的一個選項,這裡以選擇第一個選項為例 select.select_by_index(15) # 通過索引選擇 # 定位到 <input> 元素 input_element = driver.find_element(By.XPATH, '/html/body/div/main/div/div/div/div/aside/div/div[1]/div/div/section/ul/li[5]/div/div[2]/div/input') # 向 <input> 元素輸入文本 input_element.send_keys("高雄 (467441)") # 定位到按鈕元素 button_element = driver.find_element(By.XPATH, '/html/body/div[1]/main/div/div/section[1]/div[1]/div[3]/div[1]/div[1]/div[11]/div/div/div/div[2]') # 點擊按鈕 button_element.click() # 定位到按鈕元素 button_element = driver.find_element(By.XPATH, '/html/body/div[1]/main/div/div/section[1]/div[1]/div[3]/div[1]/div[1]/div[6]/div/div[1]/div/button') # 點擊按鈕 button_element.click() # 定位到按鈕元素 button_XPATH = "/html/body/div[1]/main/div/div/section[2]/div/div/section/div[5]/div[1]/div[2]/div" button_element = driver.find_element(By.XPATH, button_XPATH) # 使用變量而不是字符串 # 點擊按鈕 button_element.click() button2_XPATH = '/html/body/div[1]/main/div/div/section[2]/div/div/section/div[5]/div[1]/div[1]/label/div/div[1]' button2_element = driver.find_element(By.XPATH, button2_XPATH) # 使用變量而不是字符串 import time # 點擊按鈕 for i in range(0, 100): print(f"進行中:{i+1}/100") # 顯示進度 button2_element.click() try: button_element = WebDriverWait(driver, 5).until( EC.presence_of_element_located((By.XPATH, button_XPATH)) ) except: print("找不到指定的元素。") button_element.click() time.sleep(1) for i in range(0, 100): print(f"進行中:{i+1}/100") # 顯示進度 button2_element.click() time.sleep(5) button_element.click() time.sleep(1)

Image Not Showing Possible Reasons
  • The image was uploaded to a note which you don't have access to
  • The note which the image was originally uploaded to has been deleted
Learn More →

#匯入C:\Users\gtgrt\OneDrive\00重要文件\成大碩士\06部落格\002_R語言\02_專案\中央氣象中的CSV library(readr) library(dplyr) # 列出所有CSV檔案的路徑 file.list <- list.files(path="C:/Users/gtgrt/OneDrive/00重要文件/成大碩士/06部落格/002_R語言/02_專案/中央氣象", pattern="*.csv", full.names=TRUE) # 定義一個函數來讀取每個檔案並添加編號和日期欄位 read_and_add_info <- function(file_path) { # 從檔案路徑提取編號和日期 file_name <- basename(file_path) parts <- strsplit(file_name, "-|\\.")[[1]] code <- parts[1] date <- paste(parts[2], parts[3], parts[4], sep="-") # 讀取CSV檔案 data <- read.csv(file_path, stringsAsFactors = FALSE) # 添加編號和日期欄位 data$code <- code data$date <- date return(data) } # 讀取所有檔案並合併 combined_data <- lapply(file.list, read_and_add_info) %>% bind_rows() # 檢視合併後的數據框 head(combined_data) str(combined_data) library(dplyr) library(ggplot2) # 確保氣溫是數值型別 combined_data$氣溫... <- as.numeric(combined_data$氣溫...) # 將日期轉換為日期型別(如果尚未轉換) combined_data$date <- as.Date(combined_data$date, format="%Y-%m-%d") # 從日期中提取月份 combined_data$month <- format(combined_data$date, "%Y-%m") # 繪製箱型圖 ggplot(combined_data, aes(x=month, y=氣溫..., group=month)) + geom_boxplot() + labs(title="每月氣溫箱型圖", x="月份", y="氣溫 (°C)") + theme_minimal() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) # 調整x軸標籤角度以便閱讀 # 計算每日的平均氣溫、最大氣溫和最小氣溫 library(dplyr) library(ggplot2) # 確保氣溫是數值型別 combined_data$氣溫... <- as.numeric(combined_data$氣溫...) # 計算每日的平均氣溫、最大氣溫和最小氣溫 daily_temp <- combined_data %>% group_by(date) %>% summarise( AvgTemp = mean(氣溫..., na.rm = TRUE), MaxTemp = max(氣溫..., na.rm = TRUE), MinTemp = min(氣溫..., na.rm = TRUE) ) # 移除包含NA值的行 daily_temp <- daily_temp %>% filter(!is.na(AvgTemp) & !is.na(MaxTemp) & !is.na(MinTemp)) # 繪製圖表 ggplot(daily_temp, aes(x=date)) + geom_line(aes(y=AvgTemp), color="blue") + geom_ribbon(aes(ymin=MinTemp, ymax=MaxTemp), alpha=0.2) + labs(title="每日氣溫平均值與最大/最小值區間", x="日期", y="氣溫 (°C)") + theme_minimal() # 安裝和加載openair包 if (!require(openair)) { install.packages("openair") library(openair) } # 確保風速和風向是正確的數值型別 combined_data$風速.m.s. <- as.numeric(combined_data$風速.m.s.) combined_data$風向.360degree. <- as.numeric(combined_data$風向.360degree.) # 繪製風花圖 windRose(combined_data, ws = "風速.m.s.", wd = "風向.360degree.") data3 <- combined_data data3$windDirection_N <- cut(as.numeric(combined_data$風向.360degree.), breaks = c(0, 11.26, 33.76, 56.26, 78.76, 101.26, 127.76, 146.26, 168.76, 191.26, 213.76, 236.26, 258.76, 281.26, 303.76, 326.26, 348.75, 360), labels = c("N","NNE","NE","ENE","E","ESE","ES","SSE","S","SWS","SW","WSW","W","WNW","NW","NNW","N"), # labels = c("北","北東北","東北","東東北","東","東東南","東南","南東南","南","南西南","西南","西西南","西","西西北","西北","西北西","北"), include.lowest = TRUE) data3$Windspeed_N <- cut(as.numeric(combined_data$風速.m.s.), breaks =c(0, 2.1, 4.1, 6.1, Inf), labels = c("0~2", "2.1~4", "4.1~6", ">6.1"), right = F) data3$Windspeed <- as.numeric(data3$Windspeed) ggplot(data = data3, aes(x = windDirection_N, fill = Windspeed_N))+ geom_bar( position = position_stack(reverse = TRUE)) + theme_bw() + scale_fill_discrete(guide = guide_legend(reverse=TRUE), name = "Wind Speed (m/s)")+ coord_polar(start = -0.3) + xlab("") windD <- (c("N","NNE","NE","ENE","E","ESE","SE","SSE","S","SSW","SW","WSW","W","WNW","NW","NNW","N")) FourSeasons2 <- factor(c("冬","春","夏","秋","冬")) lab_max <- 30 plottitle <- "風向與風速分佈圖" ggplot(data = data3, aes(x = windDirection_N, fill = Windspeed_N))+ geom_bar(aes(y = (..count..)/sum(..count..)*100 ), position = position_stack(reverse = TRUE)) + theme_bw() + scale_x_discrete(drop = FALSE, limits= windD) + scale_fill_discrete(guide = guide_legend(reverse=TRUE), name = "Wind Speed (m/s)")+ coord_polar(start = -0.2) + annotate("text",x = "NE", y = c(scales::extended_breaks(5)(range(0,as.numeric(lab_max)))), label = paste(c(scales::extended_breaks(5)(range(0,as.numeric(lab_max)))),"%"))+ xlab("") + ggtitle(plottitle) + ylab("%") + theme(legend.text=element_text(family = "Times New Roman"))+ theme(legend.title=element_text(family = "Times New Roman"))+ theme(axis.title.y = element_text(family = "Times New Roman"))+ theme(title=element_text(family = "DFKai-SB"))+ theme(axis.title=element_blank(), axis.text.y=element_blank(), axis.ticks=element_blank())