# 2024-05-25 中央大學Python進階班 上課記錄
## 2024-06-01
### Pandas
#### DataFrame過濾
```python=
import pandas
data = {
'學號': [1, 2, 3, 4, 5],
'姓名': ['Aaron', 'Amber', 'Apple', 'Andy', 'Astrid'],
'國文': [100, 90, 80, 70, 60],
'英文': [90, 80, 70, 60, 50],
'數學': [80, 70, 60, 50, 40]
}
df = pandas.DataFrame(data)
# print( df[ df.國文 >= 80 ] )
# print( df[ (df.英文 > 70) & (df.數學 > 70) ] )
# print( df[ (df.國文 < 60) | (df.英文 < 60) | (df.數學 < 60) ] )
# print(df[ df.國文.isin([100, 90]) ])
# print(df[ df['姓名'].str.startswith('App') ])
# 排序
# df2 = df.sort_values('數學', ascending=True, inplace=True)
# df.loc[5] = [6, 'OK', 66, 77, 88] # 新增列資料
# df.loc[:, '歷史'] = [44, 55, 66, 77, 88, 99] # 新增欄資料
# df.loc[:, '歷史'] = [44, 55, 166, 177, 88, 99] # 修改歷史資料
# Andy英文成績改成100
# df.loc[3, '英文'] = 100
# df.drop([0, 3], inplace=True)
# df.drop('國文', axis=1, inplace=True) # axis=1代表要刪的是欄位
# print(df.loc[:, '國文':'數學'].sum())
# print(df.loc[:, '國文':'數學'].mean())
# 將所有成績只要不及的就把分數改為0
def test(val):
if val.name == '數學' or val.name == '國文' or val.name == '英文':
val = val.apply(lambda v: 0 if v < 60 else v)
return val
df = df.apply(test)
print(df)
```
#### Serias
```python=
import pandas
data = {
'學號': [1, 2, 3, 4, 5],
'姓名': ['Aaron', 'Amber', 'Apple', 'Andy', 'Astrid'],
'國文': [100, 90, 80, 70, 60],
'英文': [90, 80, 70, 60, 50],
'數學': [80, 70, 60, 50, 40]
}
df = pandas.DataFrame(data)
df.to_excel('student.xlsx', index=False)
df = pandas.read_excel('student.xlsx')
print(df['國文'])
print(df.國文)
for i in df:
print(list(df[i]))
print(df[0:2])
print(df[ ['姓名','學號'] ].head(1))
print('----')
# loc
print(df.loc[0, '國文' ])
# 取得第1 2筆資料
print(df.loc[0:2, '姓名':'英文'])
# iloc
print(df.iloc[0:3, 1:4])
# 取得最後一筆資料的國英數成績
print(df.iloc[:-2:-1, 2:])
```
### Selenium
#### 自動登入微笑單車網站
```
https://www.youbike.com.tw/region/main/login/
```
```python=
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
import time
# 初始化WebDriver
driver = webdriver.Chrome()
# 設定最長等待網頁載入的時間
driver.implicitly_wait(10)
# 打開網頁
driver.get('https://www.youbike.com.tw/region/main/login/')
# 點掉彈出視窗
box = driver.find_element(By.CLASS_NAME, 'f2e-cursor-pointer')
box.click()
# 輸入帳號密碼
account = driver.find_element(By.ID, 'loginAccount')
account.send_keys('096812345678')
time.sleep(1)
password = driver.find_element(By.ID, 'loginPassword')
password.send_keys('qQ12345678')
time.sleep(1)
# 登入
login = driver.find_element(By.LINK_TEXT, '登入')
login.click()
time.sleep(5)
```
安裝selenium套件
```python=
$ pip3 install selenium
```
#### 使用selenium下載台灣銀行匯率CSV檔案
```python=
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
import time
# 初始化WebDriver
driver = webdriver.Chrome()
# 設定最長等待網頁載入的時間
driver.implicitly_wait(10)
# 打開網頁
driver.get('https://www.google.com.tw/')
# 定位搜尋輸入框標籤
search_box = driver.find_element(By.NAME, 'q')
time.sleep(1)
search_box.send_keys('aaron')
time.sleep(2)
search_box.send_keys(Keys.ENTER)
# 爬搜尋結果
results = driver.find_elements(By.CLASS_NAME, 'yuRUbf')
for i in results:
h3 = i.find_element(By.TAG_NAME, 'h3')
print(h3.text)
time.sleep(3)
```
### Google Trend
```python=
import requests
import json
datestr = '20240531'
url = 'https://trends.google.com.tw/trends/api/dailytrends?hl=zh-TW&tz=-480&ed=' + datestr + '&geo=TW&hl=zh-TW&ns=15'
response = requests.get(url)
# print(response.text)
# 將JSON字串轉成Python物件
all_data = json.loads(response.text.replace(")]}',", ''))
for s in all_data['default']['trendingSearchesDays'][0]['trendingSearches']:
# 熱門關鍵字
print(f'{s['title']['query']}: {s['formattedTraffic']}')
```
> **備註:**
> 因為Google Trend API抓到的資料,中文字會被編碼成`\u1234`這樣的文字,如果要還原,可以透過線上工具來還原成中文字
#### Unicode Decode Online工具
https://checkserp.com/encode/unicode/
#### JSON Formatter Online工具
https://jsonformatter.org/
#### HTML網頁
```html=
<html>
<head>
<link rel="stylesheet" type="text/css" href="main.css" />
</head>
<h1 style="color: yellowgreen; font-size: 3em;" >這是我的網站</h1>
<div class="test">sdjflasdfjl</div><br />
<div id="hello">sdfjalkfskadjfl</div><br />
dsalfkjdlf<br />
<a href="https://www.google.com">前往我的部落格</a><br/>
<img width="200px" src="https://static.gltjp.com/glt/prd/data/article/21000/20205/20221009_185503_37323ab7_w1920.jpg" />
<h1>這也是標題</h1>
<h2>sdflajfdl</h2>
<h2>sdflajfdl</h2>
<h3>sdflajfdl</h3>
</html>
```
#### CSS樣式
```css=
.test {
color: blueviolet;
background-color: aqua;
}
a {
text-decoration: none;
}
#hello {
color: red;
}
h2 {
color: brown;
}
```
安裝網頁解析模組
```
$ pip install bs4
```
安裝網頁下載模組
```
$ pip install requests
```
```python=
import bs4
import requests
import time
import csv
# 下載台灣銀行匯率網頁
response = requests.get('https://rate.bot.com.tw/xrt?Lang=zh-TW')
# 解析網頁並存到soup變數
soup = bs4.BeautifulSoup(response.text, 'html.parser')
# print(soup.title)
# print(soup.html.body.a)
# divs = soup.find_all(['a', 'option'], value='zh-TW')
# divs = soup.find_all(['option', 'a'], value='zh-TW')
# divs = soup.find_all(['a', 'option'], class_='lang_ddl_css')
# query = {'aria-button': '查詢'}
# divs = soup.find_all(attrs=query)
# for i in divs:
# print(i)
# 找到tbody標籤
tbody = soup.find('tbody')
# 找到tbody內每一個tr標籤
trs = tbody.find_all('tr')
all_rates = []
for i in trs:
rate = []
# 全部的匯率名稱
print(i.td.div.find_all('div')[1].text.strip(), end='')
rate.append(i.td.div.find_all('div')[1].text.strip())
print(': ', end='')
# 查現金買入
td1 = i.find(attrs={'data-table': '本行現金買入'})
rate.append(td1.text.strip())
print(td1.text, end='')
print(', ', end='')
# 查現金賣出
td2 = i.find(attrs={'data-table': '本行現金賣出'})
rate.append(td2.text.strip())
print(td2.text.strip(), end='')
print(', ', end='')
# 查即期買入
td3 = i.find(attrs={'data-table': '本行即期買入'})
rate.append(td3.text.strip())
print(td3.text.strip(), end='')
print(', ', end='')
# 查即期賣出
td4 = i.find(attrs={'data-table': '本行即期賣出'})
rate.append(td4.text.strip())
print(td4.text.strip())
all_rates.append(rate)
file_name = time.strftime('%Y%m%d_%H%M%S.csv')
with open(file_name, 'w', encoding='utf-8', newline='') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(['幣別', '現金買入', '現金賣出', '即期買入', '即期賣出'])
writer.writerows(all_rates)
```
> `.text`: 取得標籤內容,且不包含子標籤
> `.string`: 取得標籤內容,如果內容內有子標籤,會回傳None
## 2024-05-25
#### yield
```python=
# def foo1():
# print('foo1')
# a = 3
# if a == 3:
# return 99
# print('b')
# return 8
# a = foo1()
# print(a)
# yield目的是為了節省記憶體
def foo2():
print('foo2')
yield 1 # 程式碼會停在這裡
print('foo3')
yield 2
a = foo2() # 函式內如果有yeild指令,該函式呼叫後會回傳generator產生器,而不是直接執行函式
try:
result = next(a)
print(result)
result = next(a)
print(result)
result = next(a)
print(result)
except StopIteration:
print('沒有資料了')
```
#### 用yield連續輸入數字
```python=
# data = []
# while True:
# user = input('請輸入一個數字(quit=離開): ')
# if user == 'quit':
# break
# else:
# data.append(int(user))
# total = 0
# for i in data:
# total += i
# print(total)
import tool
total = 0
for user in tool.user_input_number():
total += user
print(total)
```
###### tool.py
```python=
def user_input_number():
while True:
user = input('請輸入一個數字(quit=離開): ')
if user == 'quit':
break
else:
yield int(user)
```
#### 產生器的send
```python=
# 寫一個迭代器可以產生三個1~100隨機整數,並且加總
import random
def gen_random():
a = random.randint(1, 100)
print('第一個隨機數:', a)
a1 = yield a
print('al:', a1)
b = random.randint(1, 100)
print('第二個隨機數:', b)
b1 = yield b + a1
print('bl:', b1)
c = random.randint(1, 100)
print('第3個隨機數:', c)
yield c + b1
a = gen_random() # 拿到產生器
first_no = next(a)
# second_no = next(a)
second_no = a.send(first_no)
# third_no = next(a)
third_no = a.send(second_no)
print('總和:', third_no)
```
#### 再次解說
```python=
def foo(a, b):
print('a')
print('b')
t = yield '第一次'
print(t)
print('c')
print('d')
print('e')
print('f')
r = yield '第2次'
print(r)
gen = foo(1, 2) # 拿到產生器
r1 = next(gen)
print(r1)
# r2 = next(gen)
r2 = gen.send('Hello')
print(r2)
r3 = gen.send('test')
```
#### 物件導向
```python=
# 變數 = 屬性
class Car: # Car類別
def run(self):
print('車子啟動')
def show(self):
print(f'我是{self.color}車子')
c1 = Car() # 建立Car物件
c1.color = '紅色'
c1.show()
c2 = Car() # 建立Car物件
c2.color = '黃色'
c2.show()
c3 = Car() # 建立Car物件
c3.color = '黑色'
c3.show()
```
#### 特殊方法
```python=
# 變數 = 屬性
class Car: # Car類別
def run(self):
print('車子啟動')
def show(self):
print(f'我是{self.color}車子, 汽油: {self.gas}公升')
# 建構式
def __init__(self, gas):
self.gas = gas
self.color = '無色'
def __str__(self):
return f'不要print我'
def __add__(self, car):
return f'{self.color}跟{car.color}結合'
c1 = Car(20) # 建立Car物件
c1.color = '紅色'
c1.show()
c2 = Car(12) # 建立Car物件
# c2.color = '黃色'
c2.show()
c3 = Car(8) # 建立Car物件
c3.color = '黑色'
c3.show()
print(c1)
print(c2)
print(c1 + c2)
class SportCar(Car): # SportCar類別繼承自Car類別
def turbo_mode():
print('開啟turbo模式')
def __init__(self, gas):
super().__init__(gas)
def show(self):
print(f'我是{self.color}跑車, 汽油: {self.gas}公升')
super().show() # super() 代表呼叫父類別
sc = SportCar(33)
sc.show()
```
#### 多型
```python=
class Shape:
def draw(self):
print('我是形狀的父類別')
class ShapeNo1(Shape):
def draw(self):
print('*')
class ShapeNo2(Shape):
def draw(self):
print('**')
class ShapeNo3(Shape):
def draw(self):
print('***')
class ShapeNo4(Shape):
def draw(self):
print('****')
class ShapeNo5(Shape):
def draw(self):
print('*****')
canvas = [ShapeNo1(), ShapeNo2(), ShapeNo3(), ShapeNo4(), ShapeNo5(), ShapeNo4(), ShapeNo3(), ShapeNo2(), ShapeNo1()]
for i in canvas:
i.draw()
```
#### AOP
```python=
def b(func):
def c():
print('before')
func()
print('after')
return c
@b
def a():
print('I\'m a')
a()
```
#### OpenAI Chat
```python=
from openai import OpenAI
client = OpenAI(api_key='xxxxx')
completion = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "請用可愛的語氣回答我"},
{"role": "user", "content": "你幾歲"}
]
)
print(completion.choices[0].message.to_dict()['content'])
```
#### ChatBot
```python=
from openai import OpenAI
client = OpenAI(api_key='xxxxx')
messages=[
{"role": "system", "content": "請用可愛的語氣回答我"},
]
while True:
user = input('=> ')
messages.append({'role': 'user', 'content': user})
completion = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=messages
)
print(completion.choices[0].message.to_dict()['content'])
```
#### emoji
```python=
from openai import OpenAI
client = OpenAI(api_key='xxxxxxx')
messages=[
{"role": "system", "content": "You will be provided with text, and your task is to translate it into emojis. Do not use any regular text. Do your best with emojis only."},
]
while True:
user = input('=> ')
messages.append({'role': 'user', 'content': user})
completion = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=messages
)
print(completion.choices[0].message.to_dict()['content'])
```
#### 語音轉文字
```python=
from openai import OpenAI
client = OpenAI(api_key='xxxxxxx')
audio_file = open("ttsmaker-file-2024-5-25-13-51-45.mp3", "rb")
transcript = client.audio.transcriptions.create(
model="whisper-1",
file=audio_file
)
print(transcript.text)
```
#### 文字轉語音
```python=
from openai import OpenAI
client = OpenAI(api_key='xxxxxxx')
speech_file_path = "speech.mp3"
response = client.audio.speech.create(
model="tts-1",
voice="nova",
input='Python很難'
)
response.write_to_file(speech_file_path)
```
#### 語音聊天
```python=
from openai import OpenAI
client = OpenAI(api_key='xxx')
# 語音轉文字
audio_file = open("ttsmaker-file-2024-5-25-14-51-21.mp3", "rb")
transcript = client.audio.transcriptions.create(
model="whisper-1",
file=audio_file
)
print(transcript.text)
# ChatBot
completion = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "請用可愛的語氣回答我"},
{"role": "user", "content": transcript.text},
]
)
print(completion.choices[0].message.content)
# 文字轉語音
speech_file_path = "speech.mp3"
response = client.audio.speech.create(
model="tts-1",
voice="nova",
input=completion.choices[0].message.content
)
response.write_to_file(speech_file_path)
```
#### 文生圖
```python=
from openai import OpenAI
client = OpenAI(api_key='xxxxxxx')
response = client.images.generate(
model="dall-e-3",
prompt="中央大學的夜景,上空有極光",
n=1,
size="1024x1024"
)
print(response.data[0].url)
# 將AI產生的圖片下載到本地端
import requests
from datetime import datetime
img_data = requests.get(response.data[0].url).content
with open(f'ai_image-{datetime.now().strftime("%Y%m%d%H%M%S")}.png', 'wb') as handler:
handler.write(img_data)
```
#### Fine-tune job訓練資料
```
{"messages": [{"role": "system", "content": "中央大學"},{"role": "user", "content": "誰最棒"},{"role": "assistant", "content": "Aaron"}]}
{"messages": [{"role": "system", "content": "中央大學"},{"role": "user", "content": "誰最慘"},{"role": "assistant", "content": "Aaron"}]}
{"messages": [{"role": "system", "content": "中央大學"},{"role": "user", "content": "誰最好"},{"role": "assistant", "content": "Aaron"}]}
{"messages": [{"role": "system", "content": "中央大學"},{"role": "user", "content": "誰最矮"},{"role": "assistant", "content": "Aaron"}]}
{"messages": [{"role": "system", "content": "中央大學"},{"role": "user", "content": "誰最高"},{"role": "assistant", "content": "Aaron"}]}
{"messages": [{"role": "system", "content": "中央大學"},{"role": "user", "content": "誰最窮"},{"role": "assistant", "content": "Aaron"}]}
{"messages": [{"role": "system", "content": "中央大學"},{"role": "user", "content": "誰最有錢"},{"role": "assistant", "content": "Aaron"}]}
{"messages": [{"role": "system", "content": "中央大學"},{"role": "user", "content": "誰最厲害"},{"role": "assistant", "content": "Aaron"}]}
{"messages": [{"role": "system", "content": "中央大學"},{"role": "user", "content": "誰最聰明"},{"role": "assistant", "content": "Aaron"}]}
{"messages": [{"role": "system", "content": "中央大學"},{"role": "user", "content": "誰最酷"},{"role": "assistant", "content": "Aaron"}]}
```
#### Upload
```python=
# 上傳訓練檔案
response = client.files.create(
file=open("mydata.jsonl", "rb"),
purpose="fine-tune"
)
print(response.id)
```
#### Create fine-tuning job
```python=
# 建立訓練任務
client.fine_tuning.jobs.create(
training_file=response.id, # file id
model="gpt-3.5-turbo"
)
```
#### 使用自己訓練的model
```python=
messages=[
{"role": "system", "content": "中央大學"},
]
while True:
user = input('=> ')
messages.append({'role': 'user', 'content': user})
completion = client.chat.completions.create(
model="ft:gpt-3.5-turbo-0125:personal::9Sh1QlV6",
messages=messages
)
print(completion.choices[0].message.to_dict()['content'])
```
#### model
```
ft:gpt-3.5-turbo-0125:personal::9ShMlhoF
2024/5/25 下午4:41
ft:gpt-3.5-turbo-0125:personal::9ShEusgH
2024/5/25 下午4:33
ft:gpt-3.5-turbo-0125:personal::9Sh1QlV6
2024/5/25 下午4:19
```