# Python 學習筆記--網路爬蟲 ## webbrowser模組:呼叫瀏覽器連到指定網址 ### 根據指定網址開啟網頁 ```python= import webbrowser webbroser.open('http://www.yahoo.com.tw') ``` ![](https://hackmd.io/_uploads/ryk0ipZUh.png) ### 根據關鍵字找到google地圖所在位置 ```python= import webbrowser add = input('請輸入要找尋的地點:') webbroser.open('http://www.google.com/maps/search/' + add) ``` ![](https://hackmd.io/_uploads/S1OPnTZU2.png) ### 透過輸入關鍵字, 建立list,用google搜尋開啟多個網頁 ```python= import webbrowser locations = [] keywords=input('請輸入要搜尋的關鍵字') while keywords!='': locations.append(keywords) keywords=input('請輸入要搜尋的關鍵字') for i in locations: webbrowser.open('https://www.google.com.tw/search?q=' + i) ``` ![](https://hackmd.io/_uploads/SyYtsp-Ln.png) ## 動態網路爬蟲:Selenium ### 使用selenium 去搜尋找到資料平台開放的網頁 ```python= from selenium import webdriver from selenium.webdriver.common.by import By import time from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC driverPath='C:\driver\chromedriver.exe' driver=webdriver.Chrome(driverPath) driver.get('https://data.gov.tw/') insert =driver.find_element(By.ID,'searchInput') insert.click() insert.send_keys('出生率+高雄') time.sleep(0.5) website=driver.find_element('xpath','/html/body/div/div/div/main/div/div/div[2]/div[1]/div/input') website.click() WebDriverWait(driver,20).until(EC.element_to_be_clickable((By.PARTIAL_LINK_TEXT,'高雄市'))).click() #WebDriverWait(driver,20).until(EC.element_to_be_clickable((By.PARTIAL_LINK_TEXT,'CSV'))).click() time.sleep(3) website = driver.find_element(By.CLASS_NAME,'svg-inline--fa fa-download fa-w-16') website.click() ``` ![](https://hackmd.io/_uploads/SkkNsTb83.png) ## 動態網路爬蟲: ### 使用pandas處理數據 ```python= # Detect if fonts exist or download it import os if "NotosansTC.zip" not in os.listdir(): ! wget https://fonts.google.com/download?family=Noto%20Sans%20TC -O NotosansTC.zip if "NotoSansTC-Regular.otf" not in os.listdir(): ! unzip -o NotosansTC.zip ! ls # Import fonts with fontManager from matplotlib.font_manager import fontManager import matplotlib fontManager.addfont("./NotoSansTC-Regular.otf") matplotlib.rc('font', family='Noto Sans TC') fontManager.addfont("./NotoSansTC-Regular.otf") matplotlib.rc('font', family='Noto Sans TC') df2=df[df.county=='高雄市'] df3=df2[df2.sitename=='前鎮'] df4=df2[df2.sitename=='小港'] #print(df3) df3.sort_values('datacreationdate',ascending=True,inplace=True) df4.sort_values('datacreationdate',ascending=True,inplace=True) #print(df3) print(df3.info()) print(df3.index) print(len(df3)) print(df3.datacreationdate) df3.datacreationdate=pd.to_datetime(df3.datacreationdate) df3.datacreationdate=df3.datacreationdate.dt.strftime('%H') print(df3.datacreationdate) print(df4.info()) print(df4.index) print(len(df4)) print(df4.datacreationdate) df4.datacreationdate=pd.to_datetime(df4.datacreationdate) df4.datacreationdate=df4.datacreationdate.dt.strftime('%H') print(df4.datacreationdate) x1=df3.datacreationdate x2=df4.datacreationdate plt.figure(figsize=[10,8]) plt.subplot(2,2,1) plt.subplots_adjust(hspace=0.5) y1=df3['pm2.5'] y2=df4['pm2.5'] plt.plot(x1,y1,label='前鎮區',marker='X',color='purple') plt.plot(x2,y2,label='小港區',marker='s',color='gray') plt.title('高雄市前鎮區vs小港區pm2.5指標') plt.xlabel('時刻') plt.legend() x1=df3.datacreationdate x2=df4.datacreationdate plt.subplot(2,2,2) plt.subplots_adjust(hspace=0.5) y3=df3['pm10'] y4=df4['pm10'] plt.plot(x1,y3,label='前鎮區',marker='X',color='#9e1c88') plt.plot(x2,y4,label='小港區',marker='s',color='#db1835') plt.title('高雄市前鎮區vs小港區pm10指標') plt.xlabel('時刻') plt.legend() x1=df3.datacreationdate x2=df4.datacreationdate plt.subplot(2,2,3) plt.subplots_adjust(hspace=0.5) y5=df3['aqi'] y6=df4['aqi'] plt.plot(x1,y5,label='前鎮區',marker='X',color='#1822db') plt.plot(x2,y6,label='小港區',marker='s',color='#940d2f') plt.title('高雄市前鎮區vs小港區aqi指標') plt.xlabel('時刻') plt.legend() x1=df3.datacreationdate x2=df4.datacreationdate plt.subplot(2,2,4) plt.subplots_adjust(hspace=0.5) y7=df3['o3'] y8=df4['o3'] plt.plot(x1,y7,label='前鎮區',marker='X',color='#a89213') plt.plot(x2,y8,label='小港區',marker='s',color='#136fa8') plt.title('高雄市前鎮區vs小港區o3指標') plt.xlabel('時刻') plt.legend() ``` ![](https://hackmd.io/_uploads/r1BQJ8JO2.png)