###### tags: `web crawler`,`網路爬蟲` # web crawler 實作--查找自由時報即時新聞(結合Tkinter) **先來張成果圖(雖然版面有點陽春,但功能也還行!) 好吧!看在阿湯哥這麼帥,今天版面就決定是他了!** ![](https://i.imgur.com/0vP0RPL.jpg) ```python= import tkinter as tk from tkinter import messagebox import requests from bs4 import BeautifulSoup import time import csv import json def _hit1(): #蒐集即時新聞 global a1,a2,a3,news urL="https://news.ltn.com.tw/list/breakingnews" #自由時報電子新聞 rq=requests.get(urL).text #發出請求 soup=BeautifulSoup(rq,"html5lib") #BeautifulSoup解析網頁 for rows in soup.find_all("li"): #找到所有html裡面的li try: a1=(rows.span.text.strip()) #strip()把其餘空白丟掉 a2=(rows.a["title"].strip()) #列出新聞title a3=(rows.a["href"].strip()) #列出新聞連結 a4=("-"*30) news=[a1,a2,a3,a4]#把資訊都塞進list裡面 for i in news: listbox.insert(tk.END,[i]) #在listbox一條條列出資訊 except: continue def _hit2(): #清除功能 listbox.delete(0,tk.END) #清空List資料 def _hit3(): #儲存csv檔功能 global day #用日期記錄檔名 csvfile=open(day+"news.csv","w",newline="",encoding="utf-8-sig") writercsv=csv.writer(csvfile) urL="https://news.ltn.com.tw/list/breakingnews" rq=requests.get(urL).text soup=BeautifulSoup(rq,"html5lib") writercsv.writerow(["時間","標題","超連結"])#標題 for rows in soup.find_all("li"): try: a1=(rows.span.text.strip()) a2=(rows.a["title"].strip()) a3=(rows.a["href"].strip()) #writeR.writerow([mySoup.span.text.strip(),mySoup.a["title"].strip(),mySoup.a["href"].strip()]) writercsv.writerow([a1,a2,a3]) except: continue csvfile.close()#要記得關檔 def _hit4(): #儲存json檔功能 global day,a1,a2,a3 urL="https://news.ltn.com.tw/list/breakingnews" rq=requests.get(urL).text soup=BeautifulSoup(rq,"html5lib") news1=[] for rows in soup.find_all("li"): try: a1=(rows.span.text.strip()) a2=(rows.a["title"].strip()) a3=(rows.a["href"].strip()) news1.append([a1,a2,a3]) except: continue with open(day+"news.json","w",encoding="utf-8") as jsonFile1: json.dump(news1,jsonFile1,ensure_ascii=False,indent=4) def _hit5(): #離開功能 que=tk.messagebox.askokcancel("Hint","確定離開???") if que: win1.destroy() win1 = tk.Tk() win1.title("自由時報即時新聞!!!") win1.geometry("700x500") tt=time.localtime() day=str(tt.tm_year)+"-"+str(tt.tm_mon)+"-"+str(tt.tm_mday) #Tkinter button ,每個bt要對應一個pack() bt1 = tk.Button(win1, text="收集即時新聞",fg="Ivory",bg="OliveDrab", font=("Arial", 13), width=20, height=1, command=_hit1) bt1.pack() bt2 = tk.Button(win1, text="清除",bg="Khaki", font=("Arial", 13), width=20, height=1, command=_hit2) bt2.pack() bt3 = tk.Button(win1, text="儲存為csv檔",fg="Ivory",bg="OliveDrab", font=("Arial", 13), width=20, height=1, command=_hit3) bt3.pack() bt4 = tk.Button(win1, text="儲存為json檔",bg="Khaki", font=("Arial", 13), width=20, height=1, command=_hit4) bt4.pack() bt5 = tk.Button(win1, text="離開",fg="Ivory",bg="OliveDrab", font=("Arial", 13), width=20, height=1, command=_hit5) bt5.pack() #Tkinter 卷軸 s_bar=tk.Scrollbar(win1) s_bar.pack(side=tk.RIGHT,fill=tk.Y) listbox=tk.Listbox(win1, width=40, height=15,bg="Khaki",font=("Arial", 14),yscrollcommand=s_bar.set) listbox.pack(side=tk.BOTTOM, fill=tk.BOTH) s_bar.config(command=listbox.yview) win1.mainloop() ```