# 2022-12-25 第七天 Python網路爬蟲應用實務班上課記錄
###### tags: `python` `爬蟲` `crawler`
## JSON格式
```python=
import json
data = [2, 3, 4, "AA", "BB"]
json_data = json.dumps(data) # 將Python資料結構轉成JSON格式(字串)
print(data)
print(json_data)
print(type(data))
print(type(json_data))
python_map = {'aa': 1, 'bb': 2}
json_data = json.dumps(python_map)
print(python_map)
print(json_data)
print(type(python_map))
print(type(json_data))
print(python_map['aa'])
# print(json_data['aa']) # 通常從網路上爬到的JSON資料就是一段字串
new_data = json.loads(json_data) # 將JSON轉換成Python的資料結構
print(new_data['aa'])
# JSON最外層必須是一個list或是一個map
```
#### 合法的JSON字串
```json=
[
[1, 2, 3],
[2, 3, 4],
{
"aa": 1,
"bb": [1, 2, 3]
}
]
```
```json=
{
"aa": 1,
"id": "8908-1234,
"user": "aaron",
"is_enable": 0,
"payload": [1, 2, 3, 4, 5]
}
```
## 日期時間字串
```python=
import datetime
import locale
day = datetime.datetime.today() # 取得今天日期時間
# %Y - 年
# %m - 月
# %d - 日
# %H - 時
# %M - 分
# %S - 秒
# 注意大小寫有分
day_str = datetime.datetime.strftime(day, '%Y/%m/%d')
print(locale.getlocale())
locale.setlocale(locale.LC_ALL, 'zh-hant') # 修改Python語系為中文
day_file = datetime.datetime.strftime(day, '匯率-%Y%m%d-%H%M%S.csv')
print(day_file)
```
## 爬取Google Tren趨勢
```python=
import requests
import datetime
import json
# 今天
start_day = datetime.datetime.today()
for d in range(0, 7, 1):
target_day = start_day - datetime.timedelta(days=d)
day_str = datetime.datetime.strftime(target_day, '%Y%m%d')
print('日期:', day_str)
url = 'https://trends.google.com.tw/trends/api/dailytrends?hl=zh-TW&tz=-480&ed=' + day_str + '&geo=TW&ns=15'
# print(url)
response = requests.get(url) # 發出HTTP請求
# 請求成功再繼續往下
if response.status_code == 200:
json_data = response.text.replace(')]}\',', '')
trend_data = json.loads(json_data)
for s in trend_data['default']['trendingSearchesDays'][0]['trendingSearches']:
print(f" {s['title']['query']} - {s['formattedTraffic']}")
```
## 爬取台灣正卷交易所股票資訊
```python=
# 股價代號,公司名稱,成交價,成交量,累積成交量,開盤價,最高價,最低價,昨日收盤價,漲跌百分比,資料時間
# c , n, z, tv, v, o, h, l, y, 自己算, tlong
import requests
import json
import time
import ftplib
# 產生網頁的方法
def html_template(html_table):
return f'''
<!DOCTYPE html>
<html>
<head>
<title>我的股票資訊</title>
<meta charset="utf-8" />
<style>
*{{
box-sizing: border-box;
-webkit-box-sizing: border-box;
-moz-box-sizing: border-box;
}}
body{{
font-family: Helvetica;
-webkit-font-smoothing: antialiased;
background: rgba( 71, 147, 227, 1);
}}
h2{{
text-align: center;
font-size: 18px;
text-transform: uppercase;
letter-spacing: 1px;
color: white;
padding: 30px 0;
}}
/* Table Styles */
.table-wrapper{{
margin: 10px 70px 70px;
box-shadow: 0px 35px 50px rgba( 0, 0, 0, 0.2 );
}}
.fl-table {{
border-radius: 5px;
font-size: 12px;
font-weight: normal;
border: none;
border-collapse: collapse;
width: 100%;
max-width: 100%;
white-space: nowrap;
background-color: white;
}}
.fl-table td, .fl-table th {{
text-align: center;
padding: 8px;
}}
.fl-table td {{
border-right: 1px solid #f8f8f8;
font-size: 12px;
}}
.fl-table thead th {{
color: #ffffff;
background: #4FC3A1;
}}
.fl-table thead th:nth-child(odd) {{
color: #ffffff;
background: #324960;
}}
.fl-table tr:nth-child(even) {{
background: #F8F8F8;
}}
/* Responsive */
@media (max-width: 767px) {{
.fl-table {{
display: block;
width: 100%;
}}
.table-wrapper:before{{
content: "Scroll horizontally >";
display: block;
text-align: right;
font-size: 11px;
color: white;
padding: 0 0 10px;
}}
.fl-table thead, .fl-table tbody, .fl-table thead th {{
display: block;
}}
.fl-table thead th:last-child{{
border-bottom: none;
}}
.fl-table thead {{
float: left;
}}
.fl-table tbody {{
width: auto;
position: relative;
overflow-x: auto;
}}
.fl-table td, .fl-table th {{
padding: 20px .625em .625em .625em;
height: 60px;
vertical-align: middle;
box-sizing: border-box;
overflow-x: hidden;
overflow-y: auto;
width: 120px;
font-size: 13px;
text-overflow: ellipsis;
}}
.fl-table thead th {{
text-align: left;
border-bottom: 1px solid #f7f7f9;
}}
.fl-table tbody tr {{
display: table-cell;
}}
.fl-table tbody tr:nth-child(odd) {{
background: none;
}}
.fl-table tr:nth-child(even) {{
background: transparent;
}}
.fl-table tr td:nth-child(odd) {{
background: #F8F8F8;
border-right: 1px solid #E6E4E4;
}}
.fl-table tr td:nth-child(even) {{
border-right: 1px solid #E6E4E4;
}}
.fl-table tbody td {{
display: block;
text-align: center;
}}
}}
</style>
</head>
<body>
<h2>我的股票資訊</h2>
<div class="table-wrapper">
<table class="fl-table">
<thead>
<tr>
<!-- 股票代號,公司名稱,成交價,成交量,累積成交量,開盤價,最高價,最低價,昨日收盤價,漲跌百分比,資料時間-->
<th>股票代號</th>
<th>公司名稱</th>
<th>成交價</th>
<th>成交量</th>
<th>累積成交量</th>
<th>開盤價</th>
<th>最高價</th>
<th>最低價</th>
<th>昨日收盤價</th>
<th>漲跌百分比</th>
<th>資料時間</th>
</tr>
</thead>
<tbody>
<!-- 從python內動態產生
<tr>
<td>Content 1</td>
<td>Content 1</td>
<td>Content 1</td>
<td>Content 1</td>
<td>Content 1</td>
<td>Content 1</td>
<td>Content 1</td>
<td>Content 1</td>
<td>Content 1</td>
<td>Content 1</td>
<td>Content 1</td>
</tr> -->
{html_table}
<tbody>
</table>
</div>
</body>
</html>
'''
def timestamp2timestr(timestamp_str):
t = int(timestamp_str)
# 台灣正卷交易所得timestamp為毫秒單位,需除以1000變成秒單位才能給localtime()方法使用
time_str = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(t/1000))
print(time_str)
return time_str
stock_list_tse = ['0050', '0056', '2330', '2317', '1216'] # 上市股票
stock_list_otc = ['6547', '6180'] # 上櫃
stock_tse_str = '|'.join(['tse_{}.tw'.format(stock) for stock in stock_list_tse])
stock_otc_str = '|'.join(['otc_{}.tw'.format(stock) for stock in stock_list_otc])
stoct_str = stock_tse_str + '|' + stock_otc_str
# 將要傳遞給股票api的股票代號全部串接在一起
print(stoct_str)
url = 'https://mis.twse.com.tw/stock/api/getStockInfo.jsp?ex_ch=' + stoct_str + '&json=1&delay=0&_=1671946113152'
print(url)
response = requests.get(url)
print(response.status_code)
if response.status_code != 200:
raise Exception('取得股票資訊失敗')
else:
# 開始取得股票資訊
all_stock_info =[]
json_data = json.loads(response.text)
print(json_data['msgArray'])
for s in json_data['msgArray']:
# 股票代號,公司名稱,成交價,成交量,累積成交量,開盤價,最高價,最低價,昨日收盤價,漲跌百分比,資料時間
# c , n, z, tv, v, o, h, l, y, 自己算, tlong
stock_data = [] # 單筆股票資訊
stock_data.append(s['c']) # 股票代號
stock_data.append(s['n']) # 公司名稱
stock_data.append(s['z']) # 成交價
stock_data.append(s['tv']) # 成交量
stock_data.append(s['v']) # 累積成交量
stock_data.append(s['o']) # 開盤價
stock_data.append(s['h']) # 最高價
stock_data.append(s['l']) # 最低價
stock_data.append(s['y']) # 昨日收盤價
stock_data.append(int(float(s['z']) / float(s['o']) * 100 * 100) / 100) # 漲跌百分比
stock_data.append(timestamp2timestr(s['tlong'])) # 資料時間
print(stock_data)
all_stock_info.append(stock_data)
html_table = ''
for stock in all_stock_info:
html_table += f'''
<tr>
<td>{stock[0]}</td>
<td>{stock[1]}</td>
<td>{stock[2]}</td>
<td>{stock[3]}</td>
<td>{stock[4]}</td>
<td>{stock[5]}</td>
<td>{stock[6]}</td>
<td>{stock[7]}</td>
<td>{stock[8]}</td>
<td>{stock[9]}%</td>
<td>{stock[10]}</td>
</tr>
'''
final_html = html_template(html_table)
# 將股票資訊寫入index.html
with open('index.html', 'w', encoding='utf-8') as stock_file:
stock_file.write(final_html)
# 連線FTP
session = ftplib.FTP('files.000webhost.com', 'mystock2023', '39398890')
with open('index.html', 'rb') as my_file:
session.storbinary('STOR /public_html/index.html', my_file)
session.quit() # 結束FTP連線
```