# 專題二 - 數據一條龍 (數位時代)
### 爬蟲 - 數位時代搜尋頁面
* 將爬蟲程式模組化,函式內帶入搜尋關鍵字,可產生搜尋頁面搜尋到的文章標題、文章連結、文章日期、與點入連結後的文章內文之Json格式文字檔。
```py=
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
from bs4 import BeautifulSoup
import requests
import json
import datetime
def searchDigitalTimes(searchword):
#自訂搜尋關鍵字,使用關鍵字帶入Technews搜尋網址
TARGET_URL = f'https://www.bnext.com.tw/search/kw/{searchword}#gsc.tab=0&gsc.q={searchword}&gsc.sort=date'
#利用Selenium開啟Chrome的無頭模式
options = webdriver.ChromeOptions()
options.add_argument("headless")
driver = webdriver.Chrome(options = options)
driver.get(TARGET_URL)
wait = WebDriverWait(driver, 10)
#用BeautifalSoup定位文章列表
soup = BeautifulSoup(driver.page_source, 'html.parser')
articleList = []
elements = soup.findAll("div", {"class": "gs-webResult"})
for i in elements:
articleTitle = i.findChild("div").findChild("div").findChild("a").text
articleUrl = i.findChild("div").findChild("div").findChild("a").get('href')
if "數位時代" in articleTitle:
pass
else:
date = i.findChild("div").findNextSibling("div").findNextSibling("div").findChild("div").findNextSibling("div").findChild("div").findNextSibling("div").text[:11]
if "." in date:
date = date.replace('.','')
date = date.strip()
if '天前' in date:
num = int(date[0:1])
date = (datetime.date.today() - datetime.timedelta(days=num)).strftime("%Y年%m月%d日")
value = {
'Date': date,
'Article': articleTitle,
'Url': articleUrl
}
articleList.append(value)
articleList.pop()
#取每篇文章的內文,存回List中。
for articles in articleList:
options2 = webdriver.ChromeOptions()
options2.add_argument("headless")
driver2 = webdriver.Chrome(options = options2)
driver2.get(articles['Url'])
wait = WebDriverWait(driver2, 10)
soup2 = BeautifulSoup(driver2.page_source, 'html.parser')
paragraph = soup2.find("div", {"class": "DynamicComp"})
paraSection = paragraph.findAll("p")
str=""
for i in paraSection:
str = str + i.text.strip()
articles['contents']=str
print(articleList)
#寫入Json資料
with open('digitaltimes.txt', 'a', encoding='utf-8') as f:
json.dump(articleList, f, ensure_ascii=False, indent=4)
if __name__ == '__main__':
searchDigitalTimes('7-11')
```
#### Output

###### tags: `python` `爬蟲` `數據一條龍`