# 網路爬蟲
小論文爬取
```py=
!pip install selenium
!pip install webdriver_manager
from selenium import webdriver
from selenium.webdriver.common.keys import Keys as k
from selenium.webdriver.support.ui import Select
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
driver = webdriver.Chrome(executable_path=ChromeDriverManager().install()) #改成自己的
def crawl(when,category,keywords,win):
global driver
driver.get("https://www.shs.edu.tw/")
e = driver.find_element_by_name("user_idcode")
e.click()
e = driver.find_element_by_xpath("/html/body/div[1]/div[2]/div[1]/ul/li[4]/a")
e.click()
s = Select(driver.find_element_by_name("s_cath"))
s.select_by_visible_text(u"小論文")
s = Select(driver.find_element_by_name("s_essay_cat"))
s.select_by_visible_text(category) #種類
s = Select(driver.find_element_by_name("s_contest_number"))
s.select_by_index(when) #調整第幾個
s = Select(driver.find_element_by_id("s_record"))
s.select_by_visible_text(win)
e = driver.find_element_by_name("s_title")
e.send_keys(keywords) #關鍵字
e = driver.find_element_by_id("search_button")
e.click()
soup = BeautifulSoup(driver.page_source, "html.parser")
rt=soup.find_all("a",target="search_view")
for i in rt:
print(i.text,i["href"],sep=": ")
#driver.close()
try:
for i in range(15):
crawl(i,"物理類","擺","優等")
except:
pass
```
小說爬取
```
import urllib.request as req
import bs4
def f(url,i):
request=req.Request(url, headers={
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.190 Safari/537.36"
})
with req.urlopen(request) as response:
data=response.read().decode("utf-8")
# print(data)
root=bs4.BeautifulSoup(data,"html.parser")
# print(root.title.string)
titles=root.find("div",class_="chapter-content-wrapper")#"col-lg-12")
with open(f"無職轉生{i}","w",encoding="utf8") as fp:
fp.write(titles.div.text)
fp.close()
titles=root.find("li",class_="next")#"col-lg-12")
return "https://www.shikoto.com"+titles.a["href"]
url="https://www.shikoto.com/articles/147173/147175.html"
for i in range(1,10):
url=f(url,i)
print("Get successful,check your deck top")
#beautifulsoup4
```