# 2022-12-25 第七天 Python網路爬蟲應用實務班上課記錄 ###### tags: `python` `爬蟲` `crawler` ## JSON格式 ```python= import json data = [2, 3, 4, "AA", "BB"] json_data = json.dumps(data) # 將Python資料結構轉成JSON格式(字串) print(data) print(json_data) print(type(data)) print(type(json_data)) python_map = {'aa': 1, 'bb': 2} json_data = json.dumps(python_map) print(python_map) print(json_data) print(type(python_map)) print(type(json_data)) print(python_map['aa']) # print(json_data['aa']) # 通常從網路上爬到的JSON資料就是一段字串 new_data = json.loads(json_data) # 將JSON轉換成Python的資料結構 print(new_data['aa']) # JSON最外層必須是一個list或是一個map ``` #### 合法的JSON字串 ```json= [ [1, 2, 3], [2, 3, 4], { "aa": 1, "bb": [1, 2, 3] } ] ``` ```json= { "aa": 1, "id": "8908-1234, "user": "aaron", "is_enable": 0, "payload": [1, 2, 3, 4, 5] } ``` ## 日期時間字串 ```python= import datetime import locale day = datetime.datetime.today() # 取得今天日期時間 # %Y - 年 # %m - 月 # %d - 日 # %H - 時 # %M - 分 # %S - 秒 # 注意大小寫有分 day_str = datetime.datetime.strftime(day, '%Y/%m/%d') print(locale.getlocale()) locale.setlocale(locale.LC_ALL, 'zh-hant') # 修改Python語系為中文 day_file = datetime.datetime.strftime(day, '匯率-%Y%m%d-%H%M%S.csv') print(day_file) ``` ## 爬取Google Tren趨勢 ```python= import requests import datetime import json # 今天 start_day = datetime.datetime.today() for d in range(0, 7, 1): target_day = start_day - datetime.timedelta(days=d) day_str = datetime.datetime.strftime(target_day, '%Y%m%d') print('日期:', day_str) url = 'https://trends.google.com.tw/trends/api/dailytrends?hl=zh-TW&tz=-480&ed=' + day_str + '&geo=TW&ns=15' # print(url) response = requests.get(url) # 發出HTTP請求 # 請求成功再繼續往下 if response.status_code == 200: json_data = response.text.replace(')]}\',', '') trend_data = json.loads(json_data) for s in trend_data['default']['trendingSearchesDays'][0]['trendingSearches']: print(f" {s['title']['query']} - {s['formattedTraffic']}") ``` ## 爬取台灣正卷交易所股票資訊 ```python= # 股價代號,公司名稱,成交價,成交量,累積成交量,開盤價,最高價,最低價,昨日收盤價,漲跌百分比,資料時間 # c , n, z, tv, v, o, h, l, y, 自己算, tlong import requests import json import time import ftplib # 產生網頁的方法 def html_template(html_table): return f''' <!DOCTYPE html> <html> <head> <title>我的股票資訊</title> <meta charset="utf-8" /> <style> *{{ box-sizing: border-box; -webkit-box-sizing: border-box; -moz-box-sizing: border-box; }} body{{ font-family: Helvetica; -webkit-font-smoothing: antialiased; background: rgba( 71, 147, 227, 1); }} h2{{ text-align: center; font-size: 18px; text-transform: uppercase; letter-spacing: 1px; color: white; padding: 30px 0; }} /* Table Styles */ .table-wrapper{{ margin: 10px 70px 70px; box-shadow: 0px 35px 50px rgba( 0, 0, 0, 0.2 ); }} .fl-table {{ border-radius: 5px; font-size: 12px; font-weight: normal; border: none; border-collapse: collapse; width: 100%; max-width: 100%; white-space: nowrap; background-color: white; }} .fl-table td, .fl-table th {{ text-align: center; padding: 8px; }} .fl-table td {{ border-right: 1px solid #f8f8f8; font-size: 12px; }} .fl-table thead th {{ color: #ffffff; background: #4FC3A1; }} .fl-table thead th:nth-child(odd) {{ color: #ffffff; background: #324960; }} .fl-table tr:nth-child(even) {{ background: #F8F8F8; }} /* Responsive */ @media (max-width: 767px) {{ .fl-table {{ display: block; width: 100%; }} .table-wrapper:before{{ content: "Scroll horizontally >"; display: block; text-align: right; font-size: 11px; color: white; padding: 0 0 10px; }} .fl-table thead, .fl-table tbody, .fl-table thead th {{ display: block; }} .fl-table thead th:last-child{{ border-bottom: none; }} .fl-table thead {{ float: left; }} .fl-table tbody {{ width: auto; position: relative; overflow-x: auto; }} .fl-table td, .fl-table th {{ padding: 20px .625em .625em .625em; height: 60px; vertical-align: middle; box-sizing: border-box; overflow-x: hidden; overflow-y: auto; width: 120px; font-size: 13px; text-overflow: ellipsis; }} .fl-table thead th {{ text-align: left; border-bottom: 1px solid #f7f7f9; }} .fl-table tbody tr {{ display: table-cell; }} .fl-table tbody tr:nth-child(odd) {{ background: none; }} .fl-table tr:nth-child(even) {{ background: transparent; }} .fl-table tr td:nth-child(odd) {{ background: #F8F8F8; border-right: 1px solid #E6E4E4; }} .fl-table tr td:nth-child(even) {{ border-right: 1px solid #E6E4E4; }} .fl-table tbody td {{ display: block; text-align: center; }} }} </style> </head> <body> <h2>我的股票資訊</h2> <div class="table-wrapper"> <table class="fl-table"> <thead> <tr> <!-- 股票代號,公司名稱,成交價,成交量,累積成交量,開盤價,最高價,最低價,昨日收盤價,漲跌百分比,資料時間--> <th>股票代號</th> <th>公司名稱</th> <th>成交價</th> <th>成交量</th> <th>累積成交量</th> <th>開盤價</th> <th>最高價</th> <th>最低價</th> <th>昨日收盤價</th> <th>漲跌百分比</th> <th>資料時間</th> </tr> </thead> <tbody> <!-- 從python內動態產生 <tr> <td>Content 1</td> <td>Content 1</td> <td>Content 1</td> <td>Content 1</td> <td>Content 1</td> <td>Content 1</td> <td>Content 1</td> <td>Content 1</td> <td>Content 1</td> <td>Content 1</td> <td>Content 1</td> </tr> --> {html_table} <tbody> </table> </div> </body> </html> ''' def timestamp2timestr(timestamp_str): t = int(timestamp_str) # 台灣正卷交易所得timestamp為毫秒單位,需除以1000變成秒單位才能給localtime()方法使用 time_str = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(t/1000)) print(time_str) return time_str stock_list_tse = ['0050', '0056', '2330', '2317', '1216'] # 上市股票 stock_list_otc = ['6547', '6180'] # 上櫃 stock_tse_str = '|'.join(['tse_{}.tw'.format(stock) for stock in stock_list_tse]) stock_otc_str = '|'.join(['otc_{}.tw'.format(stock) for stock in stock_list_otc]) stoct_str = stock_tse_str + '|' + stock_otc_str # 將要傳遞給股票api的股票代號全部串接在一起 print(stoct_str) url = 'https://mis.twse.com.tw/stock/api/getStockInfo.jsp?ex_ch=' + stoct_str + '&json=1&delay=0&_=1671946113152' print(url) response = requests.get(url) print(response.status_code) if response.status_code != 200: raise Exception('取得股票資訊失敗') else: # 開始取得股票資訊 all_stock_info =[] json_data = json.loads(response.text) print(json_data['msgArray']) for s in json_data['msgArray']: # 股票代號,公司名稱,成交價,成交量,累積成交量,開盤價,最高價,最低價,昨日收盤價,漲跌百分比,資料時間 # c , n, z, tv, v, o, h, l, y, 自己算, tlong stock_data = [] # 單筆股票資訊 stock_data.append(s['c']) # 股票代號 stock_data.append(s['n']) # 公司名稱 stock_data.append(s['z']) # 成交價 stock_data.append(s['tv']) # 成交量 stock_data.append(s['v']) # 累積成交量 stock_data.append(s['o']) # 開盤價 stock_data.append(s['h']) # 最高價 stock_data.append(s['l']) # 最低價 stock_data.append(s['y']) # 昨日收盤價 stock_data.append(int(float(s['z']) / float(s['o']) * 100 * 100) / 100) # 漲跌百分比 stock_data.append(timestamp2timestr(s['tlong'])) # 資料時間 print(stock_data) all_stock_info.append(stock_data) html_table = '' for stock in all_stock_info: html_table += f''' <tr> <td>{stock[0]}</td> <td>{stock[1]}</td> <td>{stock[2]}</td> <td>{stock[3]}</td> <td>{stock[4]}</td> <td>{stock[5]}</td> <td>{stock[6]}</td> <td>{stock[7]}</td> <td>{stock[8]}</td> <td>{stock[9]}%</td> <td>{stock[10]}</td> </tr> ''' final_html = html_template(html_table) # 將股票資訊寫入index.html with open('index.html', 'w', encoding='utf-8') as stock_file: stock_file.write(final_html) # 連線FTP session = ftplib.FTP('files.000webhost.com', 'mystock2023', '39398890') with open('index.html', 'rb') as my_file: session.storbinary('STOR /public_html/index.html', my_file) session.quit() # 結束FTP連線 ```