# No 2018 ###### tags: `蒐集資料` `Python` ```python= ## SETTING ENVIRONMENT import requests from bs4 import BeautifulSoup import time import csv ## SET CLASSIFICATION LIST class_list = list(['A1','A2','A3','C3','C31','C4','C41','C42','C5','C8', 'D0','D01','D1','D2','D3''D4','D421','D422','D423', 'D424','D5','D6','D7','N1','v6','A4']) def function(): pass for dh in class_list: for year in range(2017,2016,-1): for qh in range(1,13): year = str(year) qh = str(qh) req = requests.get('http://ipub.exuezhe.com/Qk/GetArtList?dh=' + dh + '&nf=' + year + '&qh=' + qh + '&ps=24&pn=1') req.encoding = 'utf-8' s = BeautifulSoup(req.content,"html.parser") id_list = list() for child in s.find_all('r1'): id_list.append(child.find("id").text) Data_list = list() for item in id_list: intr_req = requests.get("http://ipub.exuezhe.com/Qw/GetBaseArt?id=" + str(item)) intr_req.encoding = "utf-8" intr_s = BeautifulSoup(intr_req.content,'html.parser') Data = {} Data["id"] = str(item) Data["title"] = intr_s.find('til').text Data["author"] = intr_s.find('aut').text Data["memo"] = intr_s.find('ast').text art_req = requests.get("http://ipub.exuezhe.com/Qk/GetTextArt?id=" + str(item) + "&pn=1&ps=1000") art_req.encoding = 'utf-8' art_s = BeautifulSoup(art_req.content,'html.parser') content = "" for par in art_s.find_all('r1'): content += art_s.find('ctt').text Data['content'] = content print("GET THE ARTICLE: {}".format(Data['title'])) time.sleep(5) Data_list.append(Data) filename = "exuezhe-{}-{}-{}.csv".format(dh,year,qh) with open(filename,'w',newline = '') as save: colnames = ['id','title', 'author','memo','content'] wt = csv.DictWriter(save,fieldnames = colnames) wt.writeheader() for jj in range(0,len(Data_list)): wt.writerow(Data_list[jj]) time.sleep(10) print("finish {}".format(filename)) ```