# 2023-10-14 Python進階實作班 上課記錄
## HTML與CSS基礎
```html=
<style>
.blue-color {
color:blue;
font-size: 2em;
}
p {
background-color: yellow;
}
.hi {
background-color: green;
}
</style>
<h1 id="hi" style="color:blue;">我的網站</h1>
<p style="color:blue;">段落段落段落段落段落段落段落</p>
<a href="http://www.aaronlife.com">前往我的部落格</a>
<ul>
<li>清單</li>
<li class="blue-color hi">清單</li>
<li class="blue-color">清單</li>
</ul>
123<br />
456
```
## 靜態爬蟲範例
```python=
from bs4 import BeautifulSoup
html_doc = """
<html>
<head>
<title>我的網站</title>
<style>
.blue-color {
color:blue;
font-size: 2em;
}
p {
background-color: yellow;
}
#hi {
background-color: green;
}
</style>
</head>
<body>
<h1 id="hi" style="color:blue;">我的網站</h1>
<p style="color:blue;">段落段落段落段落段落<br/>段落段落</p>
<p>第二段落</p>
<a href="http://www.aaronlife.com">前往我的部落格</a>
<ul>
<li>清單1</li>
<li class="blue-color hi">清單2</li>
<li class="blue-color">清單3</li>
</ul>
123<br />
456
</body>
</html>
"""
# 建立soup物件並且傳入網頁內容
soup = BeautifulSoup(html_doc, 'html.parser')
# 格式化網頁並顯示
print(soup.prettify())
# 顯示網頁的title
print(soup.title)
print(soup.title.string)
print(soup.html.head.title)
# 找出網頁內所有的p標籤
all_p = soup.find_all('p')
print(all_p)
for p in all_p:
print(p.string)
print(p.getText())
# 找出第一個li標籤
one_li = soup.find('li')
print(one_li.string)
# 透過#hi選擇起取得標籤
selector_hi = soup.select('#hi')
print(selector_hi)
selector_bluecolor = soup.select_one('.blue-color')
print(selector_bluecolor)
```
## 爬取台灣銀行網頁
```python=
from bs4 import BeautifulSoup
import requests
# 台灣銀行匯率的網址
url = 'https://rate.bot.com.tw/xrt?Lang=zh-TW'
# 透過requests模組把網頁內容抓下來
response = requests.get(url)
# 顯示取得網頁的結果
print(response.status_code)
# 成功爬到網頁
if response.status_code == 200:
# print(response.text)
# 解析網頁
soup = BeautifulSoup(response.text, 'html.parser')
# 取得全部的tr
all_tr = soup.tbody.find_all('tr')
for tr in all_tr:
money_type = tr.td.div.select_one('.print_show').string.strip()
if '日圓' in money_type:
tds = tr.find_all('td')
print('日圓最新匯率:')
print(f' 現金買入: {tds[1].string.strip()}')
print(f' 現金賣出: {tds[2].string.strip()}')
print(f' 即期買入: {tds[3].string.strip()}')
print(f' 即期賣出: {tds[4].string.strip()}')
```
## 爬取Google Trend
```python=
import requests
import json
import datetime
def google_trend(trend_date):
url = 'https://trends.google.com.tw/trends/api/dailytrends?hl=zh-TW&tz=-480&ed=' + trend_date + '&geo=TW&hl=zh-TW&ns=15'
response = requests.get(url)
if response.status_code == 200:
# 去掉Google Trend回傳資料多餘的字元
data = response.text.replace(')]}\',', '')
# 將JSON資料轉成Python的資料結構
trend_data = json.loads(data)
# 取得日期
t_date = trend_data['default']['trendingSearchesDays'][0]['formattedDate']
print(t_date)
# 取得關鍵字和數量
for k in trend_data['default']['trendingSearchesDays'][0]['trendingSearches']:
print(f"{k['title']['query']}: {k['formattedTraffic']}")
days = 7
# 取得今天日期
today_date = datetime.datetime.today()
for d in range(days, 0, -1):
# 日期減法
d_date = today_date - datetime.timedelta(days=d)
day_str = datetime.datetime.strftime(d_date, '%Y%m%d')
google_trend(day_str)
print()
```
## Selenium
```python=
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
# 初始化驅動程式
driver = webdriver.Chrome()
# 網頁載入的等待時間
driver.implicitly_wait(10)
# 打開網頁
driver.get('https://rate.bot.com.tw/xrt?Lang=zh-TW')
# 定位 下載Excel(CSV)檔 按鈕
download_csv = driver.find_element(By.LINK_TEXT, '下載 Excel (CSV) 檔')
# 顯示href屬性值
print(download_csv.get_attribute('href'))
# 暫停2秒
time.sleep(2)
# 點擊按鈕
download_csv.click()
# 暫停10秒
time.sleep(10)
```
## 爬取Google搜尋結果
```python=
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
import time
# 初始化selenium驅動程式
driver = webdriver.Chrome()
# 最久等待網頁打開時間(10秒)
driver.implicitly_wait(10)
# 打開網頁
driver.get('https://www.google.com')
# 定位搜尋框
search_input = driver.find_element(By.NAME, 'q')
# 輸入搜尋文字
search_input.send_keys('茶碗蒸')
time.sleep(2)
# 開始搜尋(送出ENTER)
search_input.send_keys(Keys.ENTER)
time.sleep(2)
# 定位搜尋結果
items = driver.find_elements(By.CLASS_NAME, 'LC20lb')
addr = driver.find_elements(By.CSS_SELECTOR, 'a[jsname="UWckNb"]')
result = zip(items, addr)
for i in result:
print(f'{i[0].text}\n => {i[1].get_attribute("href")}\n')
```
## 2023-10-21
#### AI聊天
```
import os
import openai
openai.api_key = os.getenv("OPENAI_API_KEY")
tokens = 0
while True:
user_input = input('請說話: ')
if user_input == '結束':
break
completion = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "用繁體中文回答"},
{"role": "user", "content": user_input}
],
n=3,
temperature=1
)
print(completion.choices[0].message.content)
tokens += completion.usage.total_tokens
print('目前總花費: ', tokens * 0.02 / 1000, '美元')
```
#### 語音轉文字
```
import os
import openai
openai.api_key = os.getenv("OPENAI_API_KEY")
audio_file = open("1.mp3", "rb")
transcript = openai.Audio.transcribe("whisper-1", audio_file)
print(transcript.text)
```
> 文字轉語音工具: https://ttsmaker.com/zh-hk
#### 語音聊天
```python=
import os
import openai
import glob
import shutil
# openai.api_key = os.getenv("OPENAI_API_KEY")
# audio_file = open("1.mp3", "rb")
# transcript = openai.Audio.transcribe("whisper-1", audio_file)
# print(transcript.text)
import os
import openai
openai.api_key = os.getenv("OPENAI_API_KEY")
tokens = 0
while True:
user_input = input('請說話: ')
if user_input == '結束':
break
# 將下載的語音檔複製到目前目錄下
dest_dir = '.'
for file in glob.glob('C:/Users/USER/Downloads/ttsmaker*.mp3'):
print(file)
shutil.copyfile(file, 'C:/Users/USER/Documents/python-0701/1.mp3')
audio_file = open("1.mp3", "rb")
transcript = openai.Audio.transcribe("whisper-1", audio_file)
completion = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "用繁體中文回答"},
{"role": "user", "content": transcript.text}
],
n=3,
temperature=1
)
print(completion.choices[0].message.content)
tokens += completion.usage.total_tokens
print('目前總花費: ', tokens * 0.02 / 1000, '美元')
```