# 2023-10-14 Python進階實作班 上課記錄 ## HTML與CSS基礎 ```html= <style> .blue-color { color:blue; font-size: 2em; } p { background-color: yellow; } .hi { background-color: green; } </style> <h1 id="hi" style="color:blue;">我的網站</h1> <p style="color:blue;">段落段落段落段落段落段落段落</p> <a href="http://www.aaronlife.com">前往我的部落格</a> <ul> <li>清單</li> <li class="blue-color hi">清單</li> <li class="blue-color">清單</li> </ul> 123<br /> 456 ``` ## 靜態爬蟲範例 ```python= from bs4 import BeautifulSoup html_doc = """ <html> <head> <title>我的網站</title> <style> .blue-color { color:blue; font-size: 2em; } p { background-color: yellow; } #hi { background-color: green; } </style> </head> <body> <h1 id="hi" style="color:blue;">我的網站</h1> <p style="color:blue;">段落段落段落段落段落<br/>段落段落</p> <p>第二段落</p> <a href="http://www.aaronlife.com">前往我的部落格</a> <ul> <li>清單1</li> <li class="blue-color hi">清單2</li> <li class="blue-color">清單3</li> </ul> 123<br /> 456 </body> </html> """ # 建立soup物件並且傳入網頁內容 soup = BeautifulSoup(html_doc, 'html.parser') # 格式化網頁並顯示 print(soup.prettify()) # 顯示網頁的title print(soup.title) print(soup.title.string) print(soup.html.head.title) # 找出網頁內所有的p標籤 all_p = soup.find_all('p') print(all_p) for p in all_p: print(p.string) print(p.getText()) # 找出第一個li標籤 one_li = soup.find('li') print(one_li.string) # 透過#hi選擇起取得標籤 selector_hi = soup.select('#hi') print(selector_hi) selector_bluecolor = soup.select_one('.blue-color') print(selector_bluecolor) ``` ## 爬取台灣銀行網頁 ```python= from bs4 import BeautifulSoup import requests # 台灣銀行匯率的網址 url = 'https://rate.bot.com.tw/xrt?Lang=zh-TW' # 透過requests模組把網頁內容抓下來 response = requests.get(url) # 顯示取得網頁的結果 print(response.status_code) # 成功爬到網頁 if response.status_code == 200: # print(response.text) # 解析網頁 soup = BeautifulSoup(response.text, 'html.parser') # 取得全部的tr all_tr = soup.tbody.find_all('tr') for tr in all_tr: money_type = tr.td.div.select_one('.print_show').string.strip() if '日圓' in money_type: tds = tr.find_all('td') print('日圓最新匯率:') print(f' 現金買入: {tds[1].string.strip()}') print(f' 現金賣出: {tds[2].string.strip()}') print(f' 即期買入: {tds[3].string.strip()}') print(f' 即期賣出: {tds[4].string.strip()}') ``` ## 爬取Google Trend ```python= import requests import json import datetime def google_trend(trend_date): url = 'https://trends.google.com.tw/trends/api/dailytrends?hl=zh-TW&tz=-480&ed=' + trend_date + '&geo=TW&hl=zh-TW&ns=15' response = requests.get(url) if response.status_code == 200: # 去掉Google Trend回傳資料多餘的字元 data = response.text.replace(')]}\',', '') # 將JSON資料轉成Python的資料結構 trend_data = json.loads(data) # 取得日期 t_date = trend_data['default']['trendingSearchesDays'][0]['formattedDate'] print(t_date) # 取得關鍵字和數量 for k in trend_data['default']['trendingSearchesDays'][0]['trendingSearches']: print(f"{k['title']['query']}: {k['formattedTraffic']}") days = 7 # 取得今天日期 today_date = datetime.datetime.today() for d in range(days, 0, -1): # 日期減法 d_date = today_date - datetime.timedelta(days=d) day_str = datetime.datetime.strftime(d_date, '%Y%m%d') google_trend(day_str) print() ``` ## Selenium ```python= from selenium import webdriver from selenium.webdriver.common.by import By import time # 初始化驅動程式 driver = webdriver.Chrome() # 網頁載入的等待時間 driver.implicitly_wait(10) # 打開網頁 driver.get('https://rate.bot.com.tw/xrt?Lang=zh-TW') # 定位 下載Excel(CSV)檔 按鈕 download_csv = driver.find_element(By.LINK_TEXT, '下載 Excel (CSV) 檔') # 顯示href屬性值 print(download_csv.get_attribute('href')) # 暫停2秒 time.sleep(2) # 點擊按鈕 download_csv.click() # 暫停10秒 time.sleep(10) ``` ## 爬取Google搜尋結果 ```python= from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys import time # 初始化selenium驅動程式 driver = webdriver.Chrome() # 最久等待網頁打開時間(10秒) driver.implicitly_wait(10) # 打開網頁 driver.get('https://www.google.com') # 定位搜尋框 search_input = driver.find_element(By.NAME, 'q') # 輸入搜尋文字 search_input.send_keys('茶碗蒸') time.sleep(2) # 開始搜尋(送出ENTER) search_input.send_keys(Keys.ENTER) time.sleep(2) # 定位搜尋結果 items = driver.find_elements(By.CLASS_NAME, 'LC20lb') addr = driver.find_elements(By.CSS_SELECTOR, 'a[jsname="UWckNb"]') result = zip(items, addr) for i in result: print(f'{i[0].text}\n => {i[1].get_attribute("href")}\n') ``` ## 2023-10-21 #### AI聊天 ``` import os import openai openai.api_key = os.getenv("OPENAI_API_KEY") tokens = 0 while True: user_input = input('請說話: ') if user_input == '結束': break completion = openai.ChatCompletion.create( model="gpt-3.5-turbo", messages=[ {"role": "system", "content": "用繁體中文回答"}, {"role": "user", "content": user_input} ], n=3, temperature=1 ) print(completion.choices[0].message.content) tokens += completion.usage.total_tokens print('目前總花費: ', tokens * 0.02 / 1000, '美元') ``` #### 語音轉文字 ``` import os import openai openai.api_key = os.getenv("OPENAI_API_KEY") audio_file = open("1.mp3", "rb") transcript = openai.Audio.transcribe("whisper-1", audio_file) print(transcript.text) ``` > 文字轉語音工具: https://ttsmaker.com/zh-hk #### 語音聊天 ```python= import os import openai import glob import shutil # openai.api_key = os.getenv("OPENAI_API_KEY") # audio_file = open("1.mp3", "rb") # transcript = openai.Audio.transcribe("whisper-1", audio_file) # print(transcript.text) import os import openai openai.api_key = os.getenv("OPENAI_API_KEY") tokens = 0 while True: user_input = input('請說話: ') if user_input == '結束': break # 將下載的語音檔複製到目前目錄下 dest_dir = '.' for file in glob.glob('C:/Users/USER/Downloads/ttsmaker*.mp3'): print(file) shutil.copyfile(file, 'C:/Users/USER/Documents/python-0701/1.mp3') audio_file = open("1.mp3", "rb") transcript = openai.Audio.transcribe("whisper-1", audio_file) completion = openai.ChatCompletion.create( model="gpt-3.5-turbo", messages=[ {"role": "system", "content": "用繁體中文回答"}, {"role": "user", "content": transcript.text} ], n=3, temperature=1 ) print(completion.choices[0].message.content) tokens += completion.usage.total_tokens print('目前總花費: ', tokens * 0.02 / 1000, '美元') ```