# Python 學習筆記--網路爬蟲 ## webbrowser 模組:呼叫瀏覽器連到指定網址 ### 根據指定網址開啟網頁 ```python= import webbrowser webbrowser.open('http://www.yahoo.com.tw') ``` ![](https://hackmd.io/_uploads/ryG5ipWL2.png) ### 根據關鍵字找到google地圖所在位置 ```py= import webbrowser add = input('請輸入要找尋的地點:') webbrowser.open('http://www.google.com/maps/search/' + add) ``` ![](https://hackmd.io/_uploads/ByXQhpWL2.png) ### 透過輸入關鍵字,建立list,用google搜尋開啟多個網頁 ```python= import webbrowser locations = [] keywords=input('請輸入要搜尋的關鍵字:') while keywords!='': locations.append(keywords) keywords=input('請輪入要搜尋的關鍵字:') for i in locations: webbrowser.open('https://www.google.com.tw/search?q=' + i) ``` ![](https://hackmd.io/_uploads/BkhNopbUn.png) ## 動態網頁爬蟲: Selenium ### 使用 selenium 去搜尋找到資料開放平台的網頁 ```python= from selenium import webdriver from selenium.webdriver.common.by import By import time from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC driverPath = 'C:\driver\chromedriver.exe' driver = webdriver.Chrome(driverPath) driver.get('https://data.gov.tw/') insert = driver.find_element(By.ID, 'searchInput') insert.click() insert.send_keys('出生率+高雄') time.sleep(0.5) website = driver.find_element('xpath', '/html/body/div[1]/div/div/main/div/div[2]/div[1]/div/label/button') website.click() WebDriverWait(driver, 20).until(EC.element_to_be_clickable(By.PARTIAL_LINK_TEXT, '高雄市')).click() time.sleep(3) ``` ![](https://hackmd.io/_uploads/Hk2HwTbUn.png) ```python= # Detect if fonts exist or download it import os if "NotosansTC.zip" not in os.listdir(): ! wget https://fonts.google.com/download?family=Noto%20Sans%20TC -O NotosansTC.zip if "NotoSansTC-Regular.otf" not in os.listdir(): ! unzip -o NotosansTC.zip ! ls # Import fonts with fontManager from matplotlib.font_manager import fontManager import matplotlib fontManager.addfont("./NotoSansTC-Regular.otf") matplotlib.rc('font', family='Noto Sans TC') ``` ```python= from numpy import NaN import requests import csv import pandas as pd import matplotlib.pyplot as plt url='https://data.epa.gov.tw/api/v2/aqx_p_488?api_key=e8dd42e6-9b8b-43f8-991e-b3dee723a52d&limit=1000&sort=datacreationdate%20desc&format=CSV' df=pd.read_csv(url) #print('------------------------------------------------') #print(df.info()) #print('------------------------------------------------') #print(df.index) #print('------------------------------------------------') #print(df.columns) #print('------------------------------------------------') #print(df) df2=df[df.county=='高雄市'] #print(df2) df3=df2[df2.sitename=='前鎮'] df01=df2[df2.sitename=='鳳山'] df3.sort_values('datacreationdate',ascending=True, inplace=True) df01.sort_values('datacreationdate',ascending=True, inplace=True) print(df3) print('------------------------------------------------') print(df01) print('------------------------------------------------') print(df3.info()) print('------------------------------------------------') print(df01.info()) print('------------------------------------------------') print(df3.index) print('------------------------------------------------') print(df01.index) print('------------------------------------------------') print(len(df3)) print('------------------------------------------------') print(len(df01)) print('------------------------------------------------') #print(df3.datacreationdate) #日期時間改成日期時間型態 df3.datacreationdate=pd.to_datetime(df3.datacreationdate) df01.datacreationdate=pd.to_datetime(df01.datacreationdate) #截掉日期只留時間 df3.datacreationdate=df3.datacreationdate.dt.strftime('%H') df01.datacreationdate=df01.datacreationdate.dt.strftime('%H') print(df3.datacreationdate) print('------------------------------------------------') print(df01.datacreationdate) x1= df3.datacreationdate x2= df01.datacreationdate plt.figure(figsize=[10,8]) plt.subplot(2,2,1) plt.subplots_adjust(hspace=0.5) y1=df3['pm2.5'] y2=df01['pm2.5'] plt.plot(x1,y1,label='前鎮區',marker='X',color='#05f5e9') plt.plot(x2,y2,label='鳳山區',marker='X',color='#9d05f5') plt.title("高雄市前鎮區VS鳳山區 pm2.5 空氣品質指標") plt.xlabel("時刻") plt.legend() plt.subplot(2,2,2) plt.subplots_adjust(hspace=0.5) y3=df3['o3'] y4=df01['o3'] plt.plot(x1,y3,label='前鎮區',marker='X') plt.plot(x2,y4,label='鳳山區',marker='X') plt.title("高雄市前鎮區VS鳳山區 o3 空氣品質指標") plt.xlabel("時刻") plt.legend() plt.subplot(2,2,3) plt.subplots_adjust(hspace=0.5) y5=df3['aqi'] y6=df01['aqi'] plt.plot(x1,y5,label='前鎮區',marker='X',color='dodgerblue') plt.plot(x2,y6,label='鳳山區',marker='X',color='deeppink') plt.title("高雄市前鎮區VS鳳山區 aqi 空氣品質指標") plt.xlabel("時刻") plt.legend() plt.subplot(2,2,4) plt.subplots_adjust(hspace=0.5) y7=df3['pm10'] y8=df01['pm10'] plt.plot(x1,y7,label='前鎮區',marker='X') plt.plot(x2,y8,label='鳳山區',marker='X') plt.title("高雄市前鎮區VS鳳山區 pm10 空氣品質指標") plt.xlabel("時刻") plt.legend() plt.show() ``` ![](https://hackmd.io/_uploads/Bkw2vOaw2.png)