# Python 學習筆記--網路爬蟲
## webbrowser 模組:呼叫瀏覽器連到指定網址
### 根據指定網址開啟網頁
```python=
import webbrowser
webbrowser.open('http://www.yahoo.com.tw')
```

### 根據關鍵字找到google地圖所在位置
```py=
import webbrowser
add = input('請輸入要找尋的地點:')
webbrowser.open('http://www.google.com/maps/search/' + add)
```

### 透過輸入關鍵字,建立list,用google搜尋開啟多個網頁
```python=
import webbrowser
locations = []
keywords=input('請輸入要搜尋的關鍵字:')
while keywords!='':
locations.append(keywords)
keywords=input('請輪入要搜尋的關鍵字:')
for i in locations:
webbrowser.open('https://www.google.com.tw/search?q=' + i)
```

## 動態網頁爬蟲: Selenium
### 使用 selenium 去搜尋找到資料開放平台的網頁
```python=
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driverPath = 'C:\driver\chromedriver.exe'
driver = webdriver.Chrome(driverPath)
driver.get('https://data.gov.tw/')
insert = driver.find_element(By.ID, 'searchInput')
insert.click()
insert.send_keys('出生率+高雄')
time.sleep(0.5)
website = driver.find_element('xpath', '/html/body/div[1]/div/div/main/div/div[2]/div[1]/div/label/button')
website.click()
WebDriverWait(driver, 20).until(EC.element_to_be_clickable(By.PARTIAL_LINK_TEXT, '高雄市')).click()
time.sleep(3)
```

```python=
# Detect if fonts exist or download it
import os
if "NotosansTC.zip" not in os.listdir():
! wget https://fonts.google.com/download?family=Noto%20Sans%20TC -O NotosansTC.zip
if "NotoSansTC-Regular.otf" not in os.listdir():
! unzip -o NotosansTC.zip
! ls
# Import fonts with fontManager
from matplotlib.font_manager import fontManager
import matplotlib
fontManager.addfont("./NotoSansTC-Regular.otf")
matplotlib.rc('font', family='Noto Sans TC')
```
```python=
from numpy import NaN
import requests
import csv
import pandas as pd
import matplotlib.pyplot as plt
url='https://data.epa.gov.tw/api/v2/aqx_p_488?api_key=e8dd42e6-9b8b-43f8-991e-b3dee723a52d&limit=1000&sort=datacreationdate%20desc&format=CSV'
df=pd.read_csv(url)
#print('------------------------------------------------')
#print(df.info())
#print('------------------------------------------------')
#print(df.index)
#print('------------------------------------------------')
#print(df.columns)
#print('------------------------------------------------')
#print(df)
df2=df[df.county=='高雄市']
#print(df2)
df3=df2[df2.sitename=='前鎮']
df01=df2[df2.sitename=='鳳山']
df3.sort_values('datacreationdate',ascending=True, inplace=True)
df01.sort_values('datacreationdate',ascending=True, inplace=True)
print(df3)
print('------------------------------------------------')
print(df01)
print('------------------------------------------------')
print(df3.info())
print('------------------------------------------------')
print(df01.info())
print('------------------------------------------------')
print(df3.index)
print('------------------------------------------------')
print(df01.index)
print('------------------------------------------------')
print(len(df3))
print('------------------------------------------------')
print(len(df01))
print('------------------------------------------------')
#print(df3.datacreationdate)
#日期時間改成日期時間型態
df3.datacreationdate=pd.to_datetime(df3.datacreationdate)
df01.datacreationdate=pd.to_datetime(df01.datacreationdate)
#截掉日期只留時間
df3.datacreationdate=df3.datacreationdate.dt.strftime('%H')
df01.datacreationdate=df01.datacreationdate.dt.strftime('%H')
print(df3.datacreationdate)
print('------------------------------------------------')
print(df01.datacreationdate)
x1= df3.datacreationdate
x2= df01.datacreationdate
plt.figure(figsize=[10,8])
plt.subplot(2,2,1)
plt.subplots_adjust(hspace=0.5)
y1=df3['pm2.5']
y2=df01['pm2.5']
plt.plot(x1,y1,label='前鎮區',marker='X',color='#05f5e9')
plt.plot(x2,y2,label='鳳山區',marker='X',color='#9d05f5')
plt.title("高雄市前鎮區VS鳳山區 pm2.5 空氣品質指標")
plt.xlabel("時刻")
plt.legend()
plt.subplot(2,2,2)
plt.subplots_adjust(hspace=0.5)
y3=df3['o3']
y4=df01['o3']
plt.plot(x1,y3,label='前鎮區',marker='X')
plt.plot(x2,y4,label='鳳山區',marker='X')
plt.title("高雄市前鎮區VS鳳山區 o3 空氣品質指標")
plt.xlabel("時刻")
plt.legend()
plt.subplot(2,2,3)
plt.subplots_adjust(hspace=0.5)
y5=df3['aqi']
y6=df01['aqi']
plt.plot(x1,y5,label='前鎮區',marker='X',color='dodgerblue')
plt.plot(x2,y6,label='鳳山區',marker='X',color='deeppink')
plt.title("高雄市前鎮區VS鳳山區 aqi 空氣品質指標")
plt.xlabel("時刻")
plt.legend()
plt.subplot(2,2,4)
plt.subplots_adjust(hspace=0.5)
y7=df3['pm10']
y8=df01['pm10']
plt.plot(x1,y7,label='前鎮區',marker='X')
plt.plot(x2,y8,label='鳳山區',marker='X')
plt.title("高雄市前鎮區VS鳳山區 pm10 空氣品質指標")
plt.xlabel("時刻")
plt.legend()
plt.show()
```
