# 0901_Human_Python
###### tags: `資料科學自學園` `Python` `人大數據`
- [time=Sat, Sep 1, 2018 6:52 AM]
```python=
# coding: utf-8
import requests
import csv
import re
import time
from bs4 import BeautifulSoup
r = requests.get('http://news.nsysu.edu.tw/files/40-1342-2910-{}.php?Lang=zh-tw'.format(range(1,272)))
for page in range(1,272):
web_r = requests.get('http://news.nsysu.edu.tw/files/40-1342-2910-{}.php?Lang=zh-tw'.format(page))
if web_r.status_code == requests.codes.ok:
print("Get No. {} page's web resource".format(page))
web_r.encoding = 'utf-8'
web_s = BeautifulSoup(web_r.text,'html.parser')
web_tag = web_s.select('#Dyn_2_2 .h5 a')
print(web_tag)
list_d = list()
for i in web_tag:
d = {}
title = str(i.get('title'))
url = str(i.get('href'))
date = str(i.find_all(class_ = 'date '))
d['title'] = title
d['url'] = url
d['date'] = date
print(d)
arti_r = requests.get(url)
if arti_r.status_code == requests.codes.ok:
print("Get {} 's content".format(title))
arti_r.encoding = 'utf-8'
arti_s = BeautifulSoup(arti_r.text, 'html.parser')
arti_tag = arti_s.find_all(class_ = "ptcontent")
content = ""
# 抓內文
for par in arti_tag:
content += str(par.text)
# 內文段落合成 Content
d['content'] = content
print("Catch the content")
list_d.append(d)
time.sleep(5)
print("Catch all the content on the No.{} .".format(page))
filename = "NsysuNews-{}.csv".format(page)
with open(filename,'w',newline = '') as save:
colnames = ['date','title', 'url', 'content']
wt = csv.DictWriter(save,fieldnames = colnames)
wt.writeheader()
for jj in range(0,len(list_d)):
wt.writerow(list_d[jj])
```