Vincent55 楊竣鴻
telegram/ 專案頂層目錄
├── __init__.py ├── 初始化
├── commands/ ├── 指令相關模組
│ ├── __init__.py │
│ ├── options.py │
│ ├── context.py │
│ └── core.py │
├── utils.py │
└── __main__.py └── 專案進入點
...
>$ python -m telegram
import utils
from commands import core
from commands import *
/usr/lib/python3.10/
/usr/lib/python3.10/
from commands.core import Core
Python 軟體包索引 (PyPI) 是 Python 程式設計語言的軟體存儲庫 [https://pypi.org/]
pip install <package-name>
安裝套件pip uninstall <package-name>
移除套件pip list
查看當前安裝的套件pip show PythonTurtle
測試是否安裝成功
pip install selenium
sitconcamp@SITCONCamp2023:~$ pip install selenium
...
Installing collected packages: sortedcontainers, sniffio, PySocks, h11, exceptiongroup, certifi, attrs, async-generator, wsproto, outcome, trio, trio-websocket, selenium
Successfully installed PySocks-1.7.1 async-generator-1.10 attrs-23.1.0 certifi-2023.5.7 exceptiongroup-1.1.1 h11-0.14.0 outcome-1.2.0 selenium-4.9.1 sniffio-1.3.0 sortedcontainers-2.4.0 trio-0.22.0 trio-websocket-0.10.2 wsproto-1.2.0
# 包含原本的 Selenium,共裝了 13 個套件
sitconcamp@SITCONCamp2023:~$ pip list | wc -l
88
sitconcamp@SITCONCamp2023:~$ pip uninstall selenium
...
Would remove:
/home/sitconcamp/.local/lib/python3.10/site-packages/selenium-4.9.1.dist-info\/\*
/home/sitconcamp/.local/lib/python3.10/site-packages/selenium\/\*
...
Successfully uninstalled selenium-4.9.1
sitconcamp@SITCONCamp2023:~$ pip list | wc -l
87
poetry init
poetry env use python
curl -sSL https://install.python-poetry.org | python3 -
echo 'export PATH="~/.local/bin:$PATH"' >> ~/.bashrc
poetry --version
poetry init
poetry env use python3
poetry add
or poetry install
poetry shell
Poetry could not find a pyproject.toml file in /home/sitconcamp or its parents
poetry add beautifulsoup4
poetry install
from bs4 import BeautifulSoup
poetry add beautifulsoup4
poetry shell
code .
即可在當前目錄開啟 Vscodepoetry env info
查看虛擬環境路徑,並將其複製
<head> <link rel="stylesheet" type="text/css" href="mycss.css"> <style> body{ background:#fff; color:#777; } h1 { font-weight:bold; font-style:italic; font-family:sans-serif; color:green; } a { color: #0077cc; text-decoration: none; } .container { border: 2px solid #ccc; padding: 20px; border-radius: 10px; } </style> </head>
https://jupiter.challenges.picoctf.org/problem/9670/
Hint: 使用開發人員工具,檢查 HTML CSS JavaScript
Requests is a simple, yet elegant, HTTP library.
pip install requests
poetry add requests
import requests #先將欲發出 GET 請求的網址先存在 url url = 'https://example.com/' #對 url 發出 GET 請求,並將 Response 物件存在 res res = requests.get(url) print(type(res), res) #Output: <class 'requests.models.Response'> <Response [200]>
import requests #先將欲發出 GET 請求的網址先存在 url url = 'https://example.com/' #對 url 發出 GET 請求,並將 Response 物件存在 res res = requests.get(url) print(res.text) ''' <!doctype html> <html> <head> <title>Example Domain</title> <meta charset="utf-8" /> <meta http-equiv="Content-type" content="text/html; charset=utf-8" /> <meta name="viewport" content="width=device-width, initial-scale=1" /> <style type="text/css"> body { background-color: #f0f0f2; margin: 0; padding: 0; font-family: -apple-system, system-ui, BlinkMacSystemFont, "Segoe UI", "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif; } div { width: 600px; margin: 5em auto; padding: 2em; background-color: #fdfdff; border-radius: 0.5em; box-shadow: 2px 3px 7px 2px rgba(0,0,0,0.02); } a:link, a:visited { color: #38488f; text-decoration: none; } @media (max-width: 700px) { div { margin: 0 auto; width: auto; } } </style> </head> <body> <div> <h1>Example Domain</h1> <p>This domain is for use in illustrative examples in documents. You may use this domain in literature without prior coordination or asking for permission.</p> <p><a href="https://www.iana.org/domains/example">More information...</a></p> </div> </body> </html> '''
https://sitcon.camp/2023/
https://sitcon.camp/2023/
re.findall(pattern, text)
response.json()
xxx.json
.json
檔案with open('result.json', 'w', encoding='utf-8') as f:
json.dump(res, f, indent=2, sort_keys=True, ensure_ascii=False)
.json
檔案取得 JSONwith open('result.json', 'r') as f:
result_file = json.load(f)
print(result_file)
import requests import json #先將欲發出 GET 請求的網址先存在 url url = 'https://httpbin.org/json' #對 url 發出 GET 請求,並將 Response 物件透過 json 解析後存在 res res = requests.get(url).json() print(type(res), res) print(res['slideshow']['slides'][1]['title']) # 將回傳值存於 result.json with open('result.json', 'w', encoding='utf-8') as f: json.dump(res, f, indent=2, sort_keys=True, ensure_ascii=False) ''' <class 'dict'> {'slideshow': {'author': 'Yours Truly', 'date': 'date of publication', 'slides': [{'title': 'Wake up to WonderWidgets!', 'type': 'all'}, {'items': ['Why <em>WonderWidgets</em> are great', 'Who <em>buys</em> WonderWidgets'], 'title': 'Overview', 'type': 'all'}], 'title': 'Sample Slide Show'}} Overview '''
https://api.thecatapi.com/v1/images/search
result.json
裡面result.json
不會被覆蓋poetry add beautifulsoup4
from bs4 import BeautifulSoup html_text = """ <html><head></head><body><h1>Hello, World!</h1></body></html> """ soup = BeautifulSoup(html_text, "html.parser") print(soup.prettify())
<html>
<head>
</head>
<body>
<h1>
Hello, World!
</h1>
</body>
</html>
soup.find('p', id='myid', class_='myclass')
from bs4 import BeautifulSoup import requests # 將 resp.text 也就是原始碼字串資料放到 BeautifulSoup 第一個參數內,並用 html.parser 當作第二個參數解析 HTML 內容 soup = BeautifulSoup(requests.get(url).text, "html.parser") # 網頁的 title print(soup.title.getText()) # 第一個尋找到的 p 標籤的文字 print(soup.find('p').getText()) # 第一個尋找到 class 為 magic 的標籤 print(soup.find(class_="magic")) # a 標籤的 href 屬性 print(soup.find('a')['href']) # 全部 class 為 article 的標籤的文字 articles = soup.find_all(class_="article") for article in articles: print(article.getText())
from bs4 import BeautifulSoup import requests url = "https://www.ptt.cc/bbs/Baseball/index.html" r = requests.get(url) soup = BeautifulSoup(r.text, "html.parser") for article in soup.find_all(class_="r-ent"): print(article)
from bs4 import BeautifulSoup import requests import json url = "https://www.ptt.cc/bbs/Baseball/index.html" r = requests.get(url) soup = BeautifulSoup(r.text, "html.parser") results = [] for article in soup.find_all(class_="r-ent"): results.append({ "title":article.find(class_="title").text.strip(), "author": article.find(class_="author").text.strip(), "link": article.find(class_="title").a['href'] }) # 將回傳值存於 result.json with open('result.json', 'w', encoding='utf-8') as f: json.dump(results, f, indent=2, sort_keys=True, ensure_ascii=False)
https://www.ptt.cc/bbs/Baseball/index.html
Hint: 可將目前的爬蟲寫成一個 function,每次爬取到上一頁連結後就呼叫自己
pip install <wheel file>.whl
packaging_tutorial/
├── LICENSE
├── pyproject.toml
├── README.md
├── src/
│ └── example_package_YOUR_USERNAME_HERE/
│ ├── __init__.py
│ └── example.py
└── tests/
Node.js® is a JavaScript runtime built on Chrome's V8 JavaScript engine.
V8 engine: https://chromium.googlesource.com/v8/v8