# 高鐵假期小助手 ## 一、發想與目的 &emsp;&emsp;在旅遊旺季上高鐵假期網站查詢方案時,常常會在點入方案網頁後看到已售罄的資訊,使得操作上較煩瑣,因此想利用程式爬取各項方案資訊,包含是否已額滿、方案中的最低價、方案網頁連結。 ## 二、實作架構 ### 程式初始化 ```python= #@title 程式初始化 import urllib.request from bs4 import BeautifulSoup as bs import re import pandas as pd ``` ### 輸入旅客資訊 ```python=+ #@title 輸入旅客資訊 # print('輸入目的地或關鍵字') # search = input() station = '台北' #@param ['南港', '台北', '板橋', '桃園', '新竹', '苗栗', '台中', '彰化', '雲林', '嘉義', '台南', '左營'] #@title Date fields date = '2022-08-17' #@param {type:"date"} station_dict = {'南港':1, '台北':2, '板橋':3, '桃園':4, '新竹':5, '苗栗':6, '台中':7, '彰化':8, '雲林':9, '嘉義':10, '台南':11, '左營':12} station = station_dict.get(station) ``` ### 查詢可預訂方案 ```python=+ #@title 查詢可預訂方案 # 煮湯 def getsoup(url): page = urllib.request.urlopen(url) soup = bs(page) return soup # 每一分頁中各方案連結 def getgrids(url): page = urllib.request.urlopen(url) soup = bs(page).find('div', {'class' : 'products-list'}) all_grids = soup.find_all('a', {'class' : 'product-card horizontal-on-tablet'}) all_grids = ['https://tholiday.thsrc.com.tw' + item['href'][:-9] + f'date={date}&station={station}' for item in all_grids] return all_grids # 方案名稱 def gettitles(soup): titles = soup.body.findAll('span', {'class' : 'text-subtitle block mb-2 transitable line-clamp-2'}) titles = [str(item)[str(item).find('\n')+9:str(item).rfind('\n')] for item in titles] return titles # 取得所有分頁連結 url = f'https://tholiday.thsrc.com.tw/products?price=930,5075&search=%E9%AB%98%E9%9B%84&station={station}&type=D1B98B1F-8B34-41D3-9164-44E4BCC1BBC5&date={date}' soup = getsoup(url) pages = soup.find('ul', {'class': 'f-pagination'}) pages = pages.find_all('a') pages.pop(0) pages.pop(-1) pages = ['https://tholiday.thsrc.com.tw' + page['href'] for page in pages] # 取得所有方案資訊 all_titles = [] all_links = [] all_price = [] for link in pages: grids = getgrids(link) for grid in grids: gsoup = getsoup(grid) options = gsoup.find_all('div', {'class' : 'px-7 rounded-lg bg-white border border-gray-light'}) for option in options: if '額滿' in option.find('option', {'value' : '0'}).text: continue else: all_titles.append(gsoup.find('h2', {'class' : 'h4 text-black'}).text) all_links.append(grid) gprice = gsoup.find_all('span', {'class' : 'h5 inline-block'}) gprice = sorted(gprice, key = lambda gp: int(str(gp.text)[4:].replace(',', ''))) gprice = gprice[0].text all_price.append(gprice) break # 資料輸出 if len(all_titles) == 0: print('全部額滿,省錢請早') else: info = {'方案' : all_titles, '最低價' : all_price, '連結' : all_links} df = pd.DataFrame(info) ``` ### 查看結果 ```python=+ #@title 查看結果 def make_clickable(val): return f'<a target="_blank" href="{val}">{val}</a>' df.style.format({'連結': make_clickable}) ``` ## 三、結果呈現 ### 在Google Colab上使用者可用下拉選單選擇站點及日期 ![](https://i.imgur.com/wBVWqL9.png =300x300) ### 查詢結果 **版面考量,僅截錄部分結果** ![](https://i.imgur.com/2C6xOuE.png) ## 四、優化方向 * **可改為網頁版,提升使用者近用性** * **可爬取最低價與最高價,以(最低價)~(最高價)的方式顯示**