# Python 學習筆記--網路爬蟲
## webbrowser模組:呼叫瀏覽器連到指定網址
### 根據指定網址開啟網頁
```python=
import webbrowser
webbroser.open('http://www.yahoo.com.tw')
```

### 根據關鍵字找到google地圖所在位置
```python=
import webbrowser
add = input('請輸入要找尋的地點:')
webbroser.open('http://www.google.com/maps/search/' + add)
```

### 透過輸入關鍵字, 建立list,用google搜尋開啟多個網頁
```python=
import webbrowser
locations = []
keywords=input('請輸入要搜尋的關鍵字')
while keywords!='':
locations.append(keywords)
keywords=input('請輸入要搜尋的關鍵字')
for i in locations:
webbrowser.open('https://www.google.com.tw/search?q=' + i)
```

## 動態網路爬蟲:Selenium
### 使用selenium 去搜尋找到資料平台開放的網頁
```python=
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driverPath='C:\driver\chromedriver.exe'
driver=webdriver.Chrome(driverPath)
driver.get('https://data.gov.tw/')
insert =driver.find_element(By.ID,'searchInput')
insert.click()
insert.send_keys('出生率+高雄')
time.sleep(0.5)
website=driver.find_element('xpath','/html/body/div/div/div/main/div/div/div[2]/div[1]/div/input')
website.click()
WebDriverWait(driver,20).until(EC.element_to_be_clickable((By.PARTIAL_LINK_TEXT,'高雄市'))).click()
#WebDriverWait(driver,20).until(EC.element_to_be_clickable((By.PARTIAL_LINK_TEXT,'CSV'))).click()
time.sleep(3)
website = driver.find_element(By.CLASS_NAME,'svg-inline--fa fa-download fa-w-16')
website.click()
```

## 動態網路爬蟲:
### 使用pandas處理數據
```python=
# Detect if fonts exist or download it
import os
if "NotosansTC.zip" not in os.listdir():
! wget https://fonts.google.com/download?family=Noto%20Sans%20TC -O NotosansTC.zip
if "NotoSansTC-Regular.otf" not in os.listdir():
! unzip -o NotosansTC.zip
! ls
# Import fonts with fontManager
from matplotlib.font_manager import fontManager
import matplotlib
fontManager.addfont("./NotoSansTC-Regular.otf")
matplotlib.rc('font', family='Noto Sans TC')
fontManager.addfont("./NotoSansTC-Regular.otf")
matplotlib.rc('font', family='Noto Sans TC')
df2=df[df.county=='高雄市']
df3=df2[df2.sitename=='前鎮']
df4=df2[df2.sitename=='小港']
#print(df3)
df3.sort_values('datacreationdate',ascending=True,inplace=True)
df4.sort_values('datacreationdate',ascending=True,inplace=True)
#print(df3)
print(df3.info())
print(df3.index)
print(len(df3))
print(df3.datacreationdate)
df3.datacreationdate=pd.to_datetime(df3.datacreationdate)
df3.datacreationdate=df3.datacreationdate.dt.strftime('%H')
print(df3.datacreationdate)
print(df4.info())
print(df4.index)
print(len(df4))
print(df4.datacreationdate)
df4.datacreationdate=pd.to_datetime(df4.datacreationdate)
df4.datacreationdate=df4.datacreationdate.dt.strftime('%H')
print(df4.datacreationdate)
x1=df3.datacreationdate
x2=df4.datacreationdate
plt.figure(figsize=[10,8])
plt.subplot(2,2,1)
plt.subplots_adjust(hspace=0.5)
y1=df3['pm2.5']
y2=df4['pm2.5']
plt.plot(x1,y1,label='前鎮區',marker='X',color='purple')
plt.plot(x2,y2,label='小港區',marker='s',color='gray')
plt.title('高雄市前鎮區vs小港區pm2.5指標')
plt.xlabel('時刻')
plt.legend()
x1=df3.datacreationdate
x2=df4.datacreationdate
plt.subplot(2,2,2)
plt.subplots_adjust(hspace=0.5)
y3=df3['pm10']
y4=df4['pm10']
plt.plot(x1,y3,label='前鎮區',marker='X',color='#9e1c88')
plt.plot(x2,y4,label='小港區',marker='s',color='#db1835')
plt.title('高雄市前鎮區vs小港區pm10指標')
plt.xlabel('時刻')
plt.legend()
x1=df3.datacreationdate
x2=df4.datacreationdate
plt.subplot(2,2,3)
plt.subplots_adjust(hspace=0.5)
y5=df3['aqi']
y6=df4['aqi']
plt.plot(x1,y5,label='前鎮區',marker='X',color='#1822db')
plt.plot(x2,y6,label='小港區',marker='s',color='#940d2f')
plt.title('高雄市前鎮區vs小港區aqi指標')
plt.xlabel('時刻')
plt.legend()
x1=df3.datacreationdate
x2=df4.datacreationdate
plt.subplot(2,2,4)
plt.subplots_adjust(hspace=0.5)
y7=df3['o3']
y8=df4['o3']
plt.plot(x1,y7,label='前鎮區',marker='X',color='#a89213')
plt.plot(x2,y8,label='小港區',marker='s',color='#136fa8')
plt.title('高雄市前鎮區vs小港區o3指標')
plt.xlabel('時刻')
plt.legend()
```
