# 網路爬蟲 小論文爬取 ```py= !pip install selenium !pip install webdriver_manager from selenium import webdriver from selenium.webdriver.common.keys import Keys as k from selenium.webdriver.support.ui import Select from webdriver_manager.chrome import ChromeDriverManager from bs4 import BeautifulSoup driver = webdriver.Chrome(executable_path=ChromeDriverManager().install()) #改成自己的 def crawl(when,category,keywords,win): global driver driver.get("https://www.shs.edu.tw/") e = driver.find_element_by_name("user_idcode") e.click() e = driver.find_element_by_xpath("/html/body/div[1]/div[2]/div[1]/ul/li[4]/a") e.click() s = Select(driver.find_element_by_name("s_cath")) s.select_by_visible_text(u"小論文") s = Select(driver.find_element_by_name("s_essay_cat")) s.select_by_visible_text(category) #種類 s = Select(driver.find_element_by_name("s_contest_number")) s.select_by_index(when) #調整第幾個 s = Select(driver.find_element_by_id("s_record")) s.select_by_visible_text(win) e = driver.find_element_by_name("s_title") e.send_keys(keywords) #關鍵字 e = driver.find_element_by_id("search_button") e.click() soup = BeautifulSoup(driver.page_source, "html.parser") rt=soup.find_all("a",target="search_view") for i in rt: print(i.text,i["href"],sep=": ") #driver.close() try: for i in range(15): crawl(i,"物理類","擺","優等") except: pass ``` 小說爬取 ``` import urllib.request as req import bs4 def f(url,i): request=req.Request(url, headers={ "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.190 Safari/537.36" }) with req.urlopen(request) as response: data=response.read().decode("utf-8") # print(data) root=bs4.BeautifulSoup(data,"html.parser") # print(root.title.string) titles=root.find("div",class_="chapter-content-wrapper")#"col-lg-12") with open(f"無職轉生{i}","w",encoding="utf8") as fp: fp.write(titles.div.text) fp.close() titles=root.find("li",class_="next")#"col-lg-12") return "https://www.shikoto.com"+titles.a["href"] url="https://www.shikoto.com/articles/147173/147175.html" for i in range(1,10): url=f(url,i) print("Get successful,check your deck top") #beautifulsoup4 ```