2024-05-25 中央大學Python進階班上課記錄

# 2024-05-25 中央大學Python進階班上課記錄 ## 2024-06-01 ### Pandas #### DataFrame過濾 ```python= import pandas data = { '學號': [1, 2, 3, 4, 5], '姓名': ['Aaron', 'Amber', 'Apple', 'Andy', 'Astrid'], '國文': [100, 90, 80, 70, 60], '英文': [90, 80, 70, 60, 50], '數學': [80, 70, 60, 50, 40] } df = pandas.DataFrame(data) # print( df[ df.國文 >= 80 ] ) # print( df[ (df.英文 > 70) & (df.數學 > 70) ] ) # print( df[ (df.國文 < 60) | (df.英文 < 60) | (df.數學 < 60) ] ) # print(df[ df.國文.isin([100, 90]) ]) # print(df[ df['姓名'].str.startswith('App') ]) # 排序 # df2 = df.sort_values('數學', ascending=True, inplace=True) # df.loc[5] = [6, 'OK', 66, 77, 88] # 新增列資料 # df.loc[:, '歷史'] = [44, 55, 66, 77, 88, 99] # 新增欄資料 # df.loc[:, '歷史'] = [44, 55, 166, 177, 88, 99] # 修改歷史資料 # Andy英文成績改成100 # df.loc[3, '英文'] = 100 # df.drop([0, 3], inplace=True) # df.drop('國文', axis=1, inplace=True) # axis=1代表要刪的是欄位 # print(df.loc[:, '國文':'數學'].sum()) # print(df.loc[:, '國文':'數學'].mean()) # 將所有成績只要不及的就把分數改為0 def test(val): if val.name == '數學' or val.name == '國文' or val.name == '英文': val = val.apply(lambda v: 0 if v < 60 else v) return val df = df.apply(test) print(df) ``` #### Serias ```python= import pandas data = { '學號': [1, 2, 3, 4, 5], '姓名': ['Aaron', 'Amber', 'Apple', 'Andy', 'Astrid'], '國文': [100, 90, 80, 70, 60], '英文': [90, 80, 70, 60, 50], '數學': [80, 70, 60, 50, 40] } df = pandas.DataFrame(data) df.to_excel('student.xlsx', index=False) df = pandas.read_excel('student.xlsx') print(df['國文']) print(df.國文) for i in df: print(list(df[i])) print(df[0:2]) print(df[ ['姓名','學號'] ].head(1)) print('----') # loc print(df.loc[0, '國文' ]) # 取得第1 2筆資料 print(df.loc[0:2, '姓名':'英文']) # iloc print(df.iloc[0:3, 1:4]) # 取得最後一筆資料的國英數成績 print(df.iloc[:-2:-1, 2:]) ``` ### Selenium #### 自動登入微笑單車網站 ``` https://www.youbike.com.tw/region/main/login/ ``` ```python= from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys import time # 初始化WebDriver driver = webdriver.Chrome() # 設定最長等待網頁載入的時間 driver.implicitly_wait(10) # 打開網頁 driver.get('https://www.youbike.com.tw/region/main/login/') # 點掉彈出視窗 box = driver.find_element(By.CLASS_NAME, 'f2e-cursor-pointer') box.click() # 輸入帳號密碼 account = driver.find_element(By.ID, 'loginAccount') account.send_keys('096812345678') time.sleep(1) password = driver.find_element(By.ID, 'loginPassword') password.send_keys('qQ12345678') time.sleep(1) # 登入 login = driver.find_element(By.LINK_TEXT, '登入') login.click() time.sleep(5) ``` 安裝selenium套件 ```python= $ pip3 install selenium ``` #### 使用selenium下載台灣銀行匯率CSV檔案 ```python= from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys import time # 初始化WebDriver driver = webdriver.Chrome() # 設定最長等待網頁載入的時間 driver.implicitly_wait(10) # 打開網頁 driver.get('https://www.google.com.tw/') # 定位搜尋輸入框標籤 search_box = driver.find_element(By.NAME, 'q') time.sleep(1) search_box.send_keys('aaron') time.sleep(2) search_box.send_keys(Keys.ENTER) # 爬搜尋結果 results = driver.find_elements(By.CLASS_NAME, 'yuRUbf') for i in results: h3 = i.find_element(By.TAG_NAME, 'h3') print(h3.text) time.sleep(3) ``` ### Google Trend ```python= import requests import json datestr = '20240531' url = 'https://trends.google.com.tw/trends/api/dailytrends?hl=zh-TW&tz=-480&ed=' + datestr + '&geo=TW&hl=zh-TW&ns=15' response = requests.get(url) # print(response.text) # 將JSON字串轉成Python物件 all_data = json.loads(response.text.replace(")]}',", '')) for s in all_data['default']['trendingSearchesDays'][0]['trendingSearches']: # 熱門關鍵字 print(f'{s['title']['query']}: {s['formattedTraffic']}') ``` > **備註:** > 因為Google Trend API抓到的資料，中文字會被編碼成`\u1234`這樣的文字，如果要還原，可以透過線上工具來還原成中文字 #### Unicode Decode Online工具 https://checkserp.com/encode/unicode/ #### JSON Formatter Online工具 https://jsonformatter.org/ #### HTML網頁 ```html= <html> <head> <link rel="stylesheet" type="text/css" href="main.css" /> </head> <h1 style="color: yellowgreen; font-size: 3em;" >這是我的網站</h1> <div class="test">sdjflasdfjl</div><br /> <div id="hello">sdfjalkfskadjfl</div><br /> dsalfkjdlf<br /> <a href="https://www.google.com">前往我的部落格</a><br/> <img width="200px" src="https://static.gltjp.com/glt/prd/data/article/21000/20205/20221009_185503_37323ab7_w1920.jpg" /> <h1>這也是標題</h1> <h2>sdflajfdl</h2> <h2>sdflajfdl</h2> <h3>sdflajfdl</h3> </html> ``` #### CSS樣式 ```css= .test { color: blueviolet; background-color: aqua; } a { text-decoration: none; } #hello { color: red; } h2 { color: brown; } ``` 安裝網頁解析模組 ``` $ pip install bs4 ``` 安裝網頁下載模組 ``` $ pip install requests ``` ```python= import bs4 import requests import time import csv # 下載台灣銀行匯率網頁 response = requests.get('https://rate.bot.com.tw/xrt?Lang=zh-TW') # 解析網頁並存到soup變數 soup = bs4.BeautifulSoup(response.text, 'html.parser') # print(soup.title) # print(soup.html.body.a) # divs = soup.find_all(['a', 'option'], value='zh-TW') # divs = soup.find_all(['option', 'a'], value='zh-TW') # divs = soup.find_all(['a', 'option'], class_='lang_ddl_css') # query = {'aria-button': '查詢'} # divs = soup.find_all(attrs=query) # for i in divs: # print(i) # 找到tbody標籤 tbody = soup.find('tbody') # 找到tbody內每一個tr標籤 trs = tbody.find_all('tr') all_rates = [] for i in trs: rate = [] # 全部的匯率名稱 print(i.td.div.find_all('div')[1].text.strip(), end='') rate.append(i.td.div.find_all('div')[1].text.strip()) print(': ', end='') # 查現金買入 td1 = i.find(attrs={'data-table': '本行現金買入'}) rate.append(td1.text.strip()) print(td1.text, end='') print(', ', end='') # 查現金賣出 td2 = i.find(attrs={'data-table': '本行現金賣出'}) rate.append(td2.text.strip()) print(td2.text.strip(), end='') print(', ', end='') # 查即期買入 td3 = i.find(attrs={'data-table': '本行即期買入'}) rate.append(td3.text.strip()) print(td3.text.strip(), end='') print(', ', end='') # 查即期賣出 td4 = i.find(attrs={'data-table': '本行即期賣出'}) rate.append(td4.text.strip()) print(td4.text.strip()) all_rates.append(rate) file_name = time.strftime('%Y%m%d_%H%M%S.csv') with open(file_name, 'w', encoding='utf-8', newline='') as csvfile: writer = csv.writer(csvfile) writer.writerow(['幣別', '現金買入', '現金賣出', '即期買入', '即期賣出']) writer.writerows(all_rates) ``` > `.text`: 取得標籤內容，且不包含子標籤 > `.string`: 取得標籤內容，如果內容內有子標籤，會回傳None ## 2024-05-25 #### yield ```python= # def foo1(): # print('foo1') # a = 3 # if a == 3: # return 99 # print('b') # return 8 # a = foo1() # print(a) # yield目的是為了節省記憶體 def foo2(): print('foo2') yield 1 # 程式碼會停在這裡 print('foo3') yield 2 a = foo2() # 函式內如果有yeild指令，該函式呼叫後會回傳generator產生器，而不是直接執行函式 try: result = next(a) print(result) result = next(a) print(result) result = next(a) print(result) except StopIteration: print('沒有資料了') ``` #### 用yield連續輸入數字 ```python= # data = [] # while True: # user = input('請輸入一個數字(quit=離開): ') # if user == 'quit': # break # else: # data.append(int(user)) # total = 0 # for i in data: # total += i # print(total) import tool total = 0 for user in tool.user_input_number(): total += user print(total) ``` ###### tool.py ```python= def user_input_number(): while True: user = input('請輸入一個數字(quit=離開): ') if user == 'quit': break else: yield int(user) ``` #### 產生器的send ```python= # 寫一個迭代器可以產生三個1~100隨機整數，並且加總 import random def gen_random(): a = random.randint(1, 100) print('第一個隨機數:', a) a1 = yield a print('al:', a1) b = random.randint(1, 100) print('第二個隨機數:', b) b1 = yield b + a1 print('bl:', b1) c = random.randint(1, 100) print('第3個隨機數:', c) yield c + b1 a = gen_random() # 拿到產生器 first_no = next(a) # second_no = next(a) second_no = a.send(first_no) # third_no = next(a) third_no = a.send(second_no) print('總和:', third_no) ``` #### 再次解說 ```python= def foo(a, b): print('a') print('b') t = yield '第一次' print(t) print('c') print('d') print('e') print('f') r = yield '第2次' print(r) gen = foo(1, 2) # 拿到產生器 r1 = next(gen) print(r1) # r2 = next(gen) r2 = gen.send('Hello') print(r2) r3 = gen.send('test') ``` #### 物件導向 ```python= # 變數 = 屬性 class Car: # Car類別 def run(self): print('車子啟動') def show(self): print(f'我是{self.color}車子') c1 = Car() # 建立Car物件 c1.color = '紅色' c1.show() c2 = Car() # 建立Car物件 c2.color = '黃色' c2.show() c3 = Car() # 建立Car物件 c3.color = '黑色' c3.show() ``` #### 特殊方法 ```python= # 變數 = 屬性 class Car: # Car類別 def run(self): print('車子啟動') def show(self): print(f'我是{self.color}車子, 汽油: {self.gas}公升') # 建構式 def __init__(self, gas): self.gas = gas self.color = '無色' def __str__(self): return f'不要print我' def __add__(self, car): return f'{self.color}跟{car.color}結合' c1 = Car(20) # 建立Car物件 c1.color = '紅色' c1.show() c2 = Car(12) # 建立Car物件 # c2.color = '黃色' c2.show() c3 = Car(8) # 建立Car物件 c3.color = '黑色' c3.show() print(c1) print(c2) print(c1 + c2) class SportCar(Car): # SportCar類別繼承自Car類別 def turbo_mode(): print('開啟turbo模式') def __init__(self, gas): super().__init__(gas) def show(self): print(f'我是{self.color}跑車, 汽油: {self.gas}公升') super().show() # super() 代表呼叫父類別 sc = SportCar(33) sc.show() ``` #### 多型 ```python= class Shape: def draw(self): print('我是形狀的父類別') class ShapeNo1(Shape): def draw(self): print('*') class ShapeNo2(Shape): def draw(self): print('**') class ShapeNo3(Shape): def draw(self): print('***') class ShapeNo4(Shape): def draw(self): print('****') class ShapeNo5(Shape): def draw(self): print('*****') canvas = [ShapeNo1(), ShapeNo2(), ShapeNo3(), ShapeNo4(), ShapeNo5(), ShapeNo4(), ShapeNo3(), ShapeNo2(), ShapeNo1()] for i in canvas: i.draw() ``` #### AOP ```python= def b(func): def c(): print('before') func() print('after') return c @b def a(): print('I\'m a') a() ``` #### OpenAI Chat ```python= from openai import OpenAI client = OpenAI(api_key='xxxxx') completion = client.chat.completions.create( model="gpt-3.5-turbo", messages=[ {"role": "system", "content": "請用可愛的語氣回答我"}, {"role": "user", "content": "你幾歲"} ] ) print(completion.choices[0].message.to_dict()['content']) ``` #### ChatBot ```python= from openai import OpenAI client = OpenAI(api_key='xxxxx') messages=[ {"role": "system", "content": "請用可愛的語氣回答我"}, ] while True: user = input('=> ') messages.append({'role': 'user', 'content': user}) completion = client.chat.completions.create( model="gpt-3.5-turbo", messages=messages ) print(completion.choices[0].message.to_dict()['content']) ``` #### emoji ```python= from openai import OpenAI client = OpenAI(api_key='xxxxxxx') messages=[ {"role": "system", "content": "You will be provided with text, and your task is to translate it into emojis. Do not use any regular text. Do your best with emojis only."}, ] while True: user = input('=> ') messages.append({'role': 'user', 'content': user}) completion = client.chat.completions.create( model="gpt-3.5-turbo", messages=messages ) print(completion.choices[0].message.to_dict()['content']) ``` #### 語音轉文字 ```python= from openai import OpenAI client = OpenAI(api_key='xxxxxxx') audio_file = open("ttsmaker-file-2024-5-25-13-51-45.mp3", "rb") transcript = client.audio.transcriptions.create( model="whisper-1", file=audio_file ) print(transcript.text) ``` #### 文字轉語音 ```python= from openai import OpenAI client = OpenAI(api_key='xxxxxxx') speech_file_path = "speech.mp3" response = client.audio.speech.create( model="tts-1", voice="nova", input='Python很難' ) response.write_to_file(speech_file_path) ``` #### 語音聊天 ```python= from openai import OpenAI client = OpenAI(api_key='xxx') # 語音轉文字 audio_file = open("ttsmaker-file-2024-5-25-14-51-21.mp3", "rb") transcript = client.audio.transcriptions.create( model="whisper-1", file=audio_file ) print(transcript.text) # ChatBot completion = client.chat.completions.create( model="gpt-3.5-turbo", messages=[ {"role": "system", "content": "請用可愛的語氣回答我"}, {"role": "user", "content": transcript.text}, ] ) print(completion.choices[0].message.content) # 文字轉語音 speech_file_path = "speech.mp3" response = client.audio.speech.create( model="tts-1", voice="nova", input=completion.choices[0].message.content ) response.write_to_file(speech_file_path) ``` #### 文生圖 ```python= from openai import OpenAI client = OpenAI(api_key='xxxxxxx') response = client.images.generate( model="dall-e-3", prompt="中央大學的夜景，上空有極光", n=1, size="1024x1024" ) print(response.data[0].url) # 將AI產生的圖片下載到本地端 import requests from datetime import datetime img_data = requests.get(response.data[0].url).content with open(f'ai_image-{datetime.now().strftime("%Y%m%d%H%M%S")}.png', 'wb') as handler: handler.write(img_data) ``` #### Fine-tune job訓練資料 ``` {"messages": [{"role": "system", "content": "中央大學"},{"role": "user", "content": "誰最棒"},{"role": "assistant", "content": "Aaron"}]} {"messages": [{"role": "system", "content": "中央大學"},{"role": "user", "content": "誰最慘"},{"role": "assistant", "content": "Aaron"}]} {"messages": [{"role": "system", "content": "中央大學"},{"role": "user", "content": "誰最好"},{"role": "assistant", "content": "Aaron"}]} {"messages": [{"role": "system", "content": "中央大學"},{"role": "user", "content": "誰最矮"},{"role": "assistant", "content": "Aaron"}]} {"messages": [{"role": "system", "content": "中央大學"},{"role": "user", "content": "誰最高"},{"role": "assistant", "content": "Aaron"}]} {"messages": [{"role": "system", "content": "中央大學"},{"role": "user", "content": "誰最窮"},{"role": "assistant", "content": "Aaron"}]} {"messages": [{"role": "system", "content": "中央大學"},{"role": "user", "content": "誰最有錢"},{"role": "assistant", "content": "Aaron"}]} {"messages": [{"role": "system", "content": "中央大學"},{"role": "user", "content": "誰最厲害"},{"role": "assistant", "content": "Aaron"}]} {"messages": [{"role": "system", "content": "中央大學"},{"role": "user", "content": "誰最聰明"},{"role": "assistant", "content": "Aaron"}]} {"messages": [{"role": "system", "content": "中央大學"},{"role": "user", "content": "誰最酷"},{"role": "assistant", "content": "Aaron"}]} ``` #### Upload ```python= # 上傳訓練檔案 response = client.files.create( file=open("mydata.jsonl", "rb"), purpose="fine-tune" ) print(response.id) ``` #### Create fine-tuning job ```python= # 建立訓練任務 client.fine_tuning.jobs.create( training_file=response.id, # file id model="gpt-3.5-turbo" ) ``` #### 使用自己訓練的model ```python= messages=[ {"role": "system", "content": "中央大學"}, ] while True: user = input('=> ') messages.append({'role': 'user', 'content': user}) completion = client.chat.completions.create( model="ft:gpt-3.5-turbo-0125:personal::9Sh1QlV6", messages=messages ) print(completion.choices[0].message.to_dict()['content']) ``` #### model ``` ft:gpt-3.5-turbo-0125:personal::9ShMlhoF 2024/5/25 下午4:41 ft:gpt-3.5-turbo-0125:personal::9ShEusgH 2024/5/25 下午4:33 ft:gpt-3.5-turbo-0125:personal::9Sh1QlV6 2024/5/25 下午4:19 ```