owned this note
owned this note
Published
Linked with GitHub
# 人臉辨識初體驗
opencv 原生是 C 語言影像處理函式庫
## PIL 圖像讀取,旋轉
```python=
from PIL import Image
with Image.open(r"../職訓所照片/07-xxx.jpg") as im:
# im.rotate(45).show()
print(type(im))
print(dir(im))
img = np.array(im)
print(type(img))
# print(img)
print(img.shape)
```
<!-- > 查看
```python=
# 移除魔法函式後的方法與屬性
<class 'http.client.HTTPResponse'>
[
"begin",
"chunk_left",
"chunked",
"close",
"closed",
"code",
"debuglevel",
"detach",
"fileno",
"flush",
"fp",
"getcode",
"getheader",
"getheaders",
"geturl",
"headers",
"info",
"isatty",
"isclosed",
"length",
"msg",
"peek",
"read",
"read1",
"readable",
"readinto",
"readinto1",
"readline",
"readlines",
"reason",
"seek",
"seekable",
"status",
"tell",
"truncate",
"url",
"version",
"will_close",
"writable",
"write",
"writelines",
]
<class 'PIL.JpegImagePlugin.JpegImageFile'>
[
"alpha_composite",
"app",
"applist",
"apply_transparency",
"bits",
"close",
"convert",
"copy",
"crop",
"custom_mimetype",
"decoderconfig",
"decodermaxblock",
"draft",
"effect_spread",
"entropy",
"filename",
"filter",
"format",
"format_description",
"fp",
"frombytes",
"get_child_images",
"get_format_mimetype",
"getbands",
"getbbox",
"getchannel",
"getcolors",
"getdata",
"getexif",
"getextrema",
"getim",
"getpalette",
"getpixel",
"getprojection",
"getxmp",
"has_transparency_data",
"height",
"histogram",
"huffman_ac",
"huffman_dc",
"icclist",
"im",
"info",
"layer",
"layers",
"load",
"load_djpeg",
"load_end",
"load_prepare",
"load_read",
"map",
"mode",
"palette",
"paste",
"point",
"putalpha",
"putdata",
"putpalette",
"putpixel",
"pyaccess",
"quantization",
"quantize",
"readonly",
"reduce",
"remap_palette",
"resize",
"rotate",
"save",
"seek",
"show",
"size",
"split",
"tell",
"thumbnail",
"tile",
"tobitmap",
"tobytes",
"toqimage",
"toqpixmap",
"transform",
"transpose",
"verify",
"width",
]
``` -->
## Numpy 圖片裁切
為了讀取中文字體
open ---> numpy ndarray ---> 切片 ---> fromarray --> show
Image 物件 ---> nd物件 ---> Image物件 影像物件的函式
https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.fromarray
```python=
from PIL import Image
with Image.open(r"../職訓所照片/07-xxx.jpg") as im:
img = np.array(im)
# 使用 numpy 切片來裁切臉部區域
# 300:1600 是 row 的範圍(Y 軸),700:2000 是 col 的範圍(X 軸)
face_region = img[300:1700, 700:2000, :]
# 將裁切後的圖像轉換回 PIL 影像並顯示或保存
face_image = Image.fromarray(face_region)
print(type(face_image))
face_image.show() # 顯示裁切後的圖像
# face_image.save('face_cropped.jpg') # 選擇保存裁切後的圖像
```
## PIL 圖片合成,顯示字體
pillow 官方範例:
https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.Image.convert
https://pillow.readthedocs.io/en/stable/handbook/concepts.html#concept-modes
https://pillow.readthedocs.io/en/stable/reference/ImageDraw.html#example-draw-partial-opacity-text
Mode 模式
## `Pillow` 模式 (Mode) 翻譯:
1. **1 (1位元像素,黑白)**:
- 單位元(每個像素1位元),非彩色圖像,像素值為0或1。黑色為0,白色為1。
2. **L (8位元像素,灰度)**:
- 每個像素8位元,範圍為0(黑色)到255(白色)。這是灰度圖像的模式。
4. **RGB (3x8位元像素,真彩色)**:
- 每個像素由紅色、綠色、藍色三個通道組成,每個通道8位元。這是標準的真彩色模式。
5. **RGBA (4x8位元像素,真彩色 + Alpha)**:
- 與 `RGB` 類似,但多了一個 Alpha 通道。Alpha 通道定義了像素的透明度,0 表示完全透明,255 表示完全不透明。
這些模式定義了圖像每個像素的結構和存儲方式,選擇適合的模式可以根據不同的應用需求進行圖像處理和顯示。
延伸閱讀:
```python=
# https://pillow.readthedocs.io/en/stable/reference/ImageDraw.html#example-draw-partial-opacity-text
from PIL import Image, ImageDraw, ImageFont
# get an image
with Image.open("../職訓所照片/07-xxx.jpg").convert("RGBA") as base:
# make a blank image for the text, initialized to transparent text color
txt = Image.new("RGBA", base.size, (255, 255, 255, 0))
# get a font
fnt = ImageFont.truetype(
r"/Users/larry/Library/CloudStorage/OneDrive-個人/新竹職訓所/補充講義/NotoSansTC-Medium.ttf", 80)
# get a drawing context
d = ImageDraw.Draw(txt)
# draw text, half opacity
d.text((1200, 1300), "原始照片", font=fnt, fill=(255, 255, 255, 128))
# draw text, full opacity
d.text((1200, 1600), "01-王齡移.jpg", font=fnt, fill=(255, 255, 255, 255))
out = Image.alpha_composite(base, txt)
out.show()
```
另外一種方式用 numpy 用二進位讀圖片再用 cv2 解碼
https://vocus.cc/article/664fec0afd8978000149dcd6
老師提供方式:
用 Pillow 讀檔案為陣列形式,加上字體
# 學習資源
## Opencv
https://steam.oxxostudio.tw/category/python/ai/opencv.html
## urlib request
https://docs.python.org/zh-tw/3/library/urllib.request.html#module-urllib.request
# Pyhton Pillow 套件模組
https://pillow.readthedocs.io/en/stable/reference/Image.html
格式美化查看:
https://codebeautify.org/python-formatter-beautifier#
人臉辨識完整程式碼:
課堂透過案例說明能夠這樣運用跟改參數,以下是使用 **小助理** 進行的文字解釋跟說明。
```
import dlib
import cv2
# 選擇第一隻攝影機
cap = cv2.VideoCapture(2)
# cap = cv2.VideoCapture('index00.mp4')
# 調整預設影像大小,預設值很大,很吃效能
cap.set(cv2. CAP_PROP_FRAME_WIDTH, 1000)
cap.set(cv2. CAP_PROP_FRAME_HEIGHT, 1000)
# 取得預設的臉部偵測器
detector = dlib.get_frontal_face_detector()
# 根據shape_predictor方法載入68個特徵點模型,此方法為人臉表情識別的偵測器
predictor = dlib.shape_predictor('shape_predictor_68_face_landmarks.dat')
# 當攝影機打開時,對每個frame進行偵測
while True:
# 讀出frame資訊
# 真或假,1frame ndarray
_, frame = cap.read()
# frame = cv2.flip(frame,0)
img_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
face_rects = detector(img_gray, 0)
# 取出偵測的結果
for d in face_rects:
x1 = d.left()
y1 = d.top()
x2 = d.right()
y2 = d.bottom()
# 繪製出偵測人臉的矩形範圍1
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2, cv2.LINE_AA)
# 找出特徵點位置
shape = predictor(img_gray, d)
# 繪製68個特徵點
for i in range(68):
cv2.circle(frame, (shape.part(i).x, shape.part(i).y), 2, (128, 128, 128), 2)
# 輸出到畫面
cv2.namedWindow('Face Detection', 0)
cv2.imshow("Face Detection", frame)
# 如果按下ESC键,就退出
if cv2.waitKey(10) == 27:
break
# 釋放記憶體
cap.release()
# 關閉所有視窗
cv2.destroyAllWindows()
```
```python
import os
import numpy as np
import cv2 # 影像處理模組 OpenCV
import dlib # 人臉識別模組 dlib
path = '/Users/larry/Library/CloudStorage/OneDrive-個人/新竹職訓所/Python/Python310/職訓所照片/'
# 裁剪後儲存的目標資料夾
target_path = './裁剪後照片/'
# 檢查資料夾是否存在,否則建立
if not os.path.exists(target_path):
os.mkdir(target_path)
name_list = []
for root, dirs, files in os.walk(path):
for file in files:
name_list.append(os.path.join(root, file))
# print(name_list)
print(len(name_list))
# dlib
detector = dlib.get_frontal_face_detector() # 使用dlib模組提供的人臉偵測函式,基於HOG特徵,建立找尋人臉的物件
predictor = dlib.shape_predictor('shape_predictor_68_face_landmarks.dat')
# 人臉68個特徵形狀預測物件的產生,是基于 Ensemble of Regression Trees 理論
# cv2讀取影像 自寫函式
def cv2_imread(filePath):
cv_img = cv2.imdecode(np.fromfile(filePath, dtype=np.uint8), cv2.IMREAD_UNCHANGED)
return cv_img
for name in name_list:
img = cv2_imread(name)
# 檢查影像是否成功讀取
if img is None:
print(f"Failed to load image: {name}")
continue # 跳過無法讀取的影像
# 過濾掉非圖片檔案,例如 .DS_Store
if not name.lower().endswith(('.png', '.jpg', '.jpeg')):
print(f"Skipping non-image file: {name}")
continue
# 取灰度
img_gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
# 先看到人臉在甚麼地方
rects = detector(img_gray, 1) # 人臉方框的矩形左上右下座標
# print(dir(rects[0])) # <class '_dlib_pybind11.rectangle'>
# 'area', 'bl_corner', 'bottom', 'br_corner', 'center', 'contains', 'dcenter', 'height',
# 'intersect', 'is_empty', 'left', 'right', 'tl_corner', 'top', 'tr_corner', 'width'
# 增加裁切範圍的邊界(例如增加 20 像素)
padding = 20
# 計算擴大後的裁切範圍,並確保不超出影像的邊界
top = max(0, rects[0].top() - padding)
bottom = min(img.shape[0], rects[0].bottom() + padding)
left = max(0, rects[0].left() - padding)
right = min(img.shape[1], rects[0].right() + padding)
img1 = img[rects[0].top():rects[0].bottom(), rects[0].left():rects[0].right()]
path = name[:-4] + '1.jpg'
# 取得檔案名稱(不含路徑)
filename = os.path.basename(name)
filename_without_extension = os.path.splitext(filename)[0] + '1.jpg'
# 正確拼接目標路徑
path2 = os.path.join(target_path, filename_without_extension)
print(path)
cv2.imencode('.jpg', img1)[1].tofile(path2)
# a = "ABCDEFG"
#
# print(a[1:5:2])
# B,
# 位置:0,1,2
# a[初始位置:最後位置+1:]
```
辨識動態影片
```
# 人臉辨識動態
import pickle
import cv2
import dlib
import numpy
from PIL import Image, ImageDraw, ImageFont
font_file = r"./NotoSansTC-Bold.ttf"
_font = ImageFont.truetype(font_file, 12) # PIL
def print_array_details(a):
print('Dimensions: %d, shape: %s, dtype: %s' % (a.ndim, a.shape, a.dtype))
pickle_file1 = './裁剪後照片/staff_descriptors.pickle'
pickle_file2 = './裁剪後照片/staff_candidate.pickle'
with open(pickle_file1, 'rb') as f1:
descriptors = pickle.load(f1) # 載入 30 個 基準人頭的特徵矩陣,每一個元素都是 numpy
with open(pickle_file2, 'rb') as f2:
candidate = pickle.load(f2) # 載入候選人姓名
predictor = "shape_predictor_68_face_landmarks.dat" # 人臉68特徵點模型
recogmodel = "dlib_face_recognition_resnet_model_v1.dat" # 人臉辨識模型
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor(predictor)
facerec = dlib.face_recognition_model_v1(recogmodel) # 讀入人臉辨識模型
file = "./index00_smallv.mp4"
cap = cv2.VideoCapture(file) # 讀取電腦攝影機鏡頭影像
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) # 取得影像寬度
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) # 取得影像高度
# fourcc = cv2.VideoWriter_fourcc(*'XVID')
# out = cv2.VideoWriter('output.avi', fourcc, 30.0, (width, height)) # 產生空的影片,fps=30
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(f'{file}_output.mp4', fourcc, 30.0, (width, height))
if not cap.isOpened():
print("Cannot open camera")
exit()
while True:
ret, frame = cap.read()
if not ret:
print("Cannot receive frame")
break
imgObj = Image.fromarray(frame) # 產生一個 ImageObject
# 檢查影像是否過於模糊,若模糊則跳過
# if cv2.Laplacian(frame, cv2.CV_64F).var() < 100:
# print("Skipping blurred frame")
# continue # 跳過過於模糊的幀
gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
rects = detector(frame, 1) # 讀到了一個畫面
dist = []
dict_face_dist = {}
for n, d in enumerate(rects):
shape = predictor(frame, d) # 特徵點偵測
feature = facerec.compute_face_descriptor(frame, shape) # 取得128維特徵向量
d_test = numpy.array(feature) # 128維特徵向量轉成 numpy
# 計算歐式距離
for item in descriptors:
dist_ = numpy.linalg.norm(item - d_test)
dist.append(dist_)
dict_face_dist[n] = [(d.left(), d.top(), d.right(), d.bottom()), dist]
dist = []
draw = ImageDraw.Draw(imgObj)
for n, key in enumerate(dict_face_dist):
# 將比對人名和比對出來的歐式距離組成一個dict
c_d = dict(zip(candidate, dict_face_dist[key][1]))
# 根據歐式距離由小到大排序
cd_sorted = sorted(c_d.items(), key=lambda d: d[1])
# 取得最短距離就為辨識出的人名
# rec_name = cd_sorted[0][0] + str(n)
rec_name = cd_sorted[0][0] + str(round(cd_sorted[0][1],2))
print(f'{str(key) + cd_sorted[0][0]:10s} {round(cd_sorted[0][1], 2)}')
if round(cd_sorted[0][1], 2) <= 0.5:
left = dict_face_dist[key][0][0]
top = dict_face_dist[key][0][1]
right = dict_face_dist[key][0][2]
bottom = dict_face_dist[key][0][3]
draw.rectangle(((left, top), (right, bottom)), outline='blue')
# txt_w, txt_h = draw.textsize(rec_name, font=_font)
draw.rectangle(((left, bottom), (right, bottom + 20 + 10)), fill='blue', outline='blue')
draw.text((left + 8, bottom + 5), rec_name, fill='white', font=_font)
img = numpy.array(imgObj)
out.write(img) # 將取得的每一幀圖像寫入空的影片
cv2.imshow('image processing', img)
if cv2.waitKey(int(1000 / 30)) == ord('q'):
break # 按下 q 鍵停止
cap.release()
out.release() # 釋放資源
cv2.destroyAllWindows()
# Skipping invalid or failed to load image: .DS_Store
```
提問給 AI 的回覆調整參數說明:
要修改 `faces = face_cascade.detectMultiScale(img, scaleFactor=1.03, minNeighbors=9, minSize=(55,55), maxSize=(80,80))` 這行程式碼,您可以調整以下參數來影響人臉偵測的結果:
### 可調參數說明:
1. **`scaleFactor`**:
- 這個參數決定每次圖像尺寸縮小多少。較大的值會加速檢測過程,但可能會漏掉一些人臉。較小的值則會進行更細緻的檢測,但檢測時間也會增加。
- 修改範例:`scaleFactor=1.05` 或 `scaleFactor=1.1` 來加快偵測,或減少到 `scaleFactor=1.01` 提高精確度。
2. **`minNeighbors`**:
- 這個參數決定每個候選區域需要多少個鄰居才能被保留。值越高,結果越精確,但可能會漏掉一些人臉。值越低,則可能會偵測到更多不真實的人臉。
- 修改範例:將 `minNeighbors=5` 來降低過濾條件,或將其增加到 `minNeighbors=10` 提高準確度。
3. **`minSize`** 和 **`maxSize`**:
- 這些參數用來設置檢測框的最小和最大尺寸。這可以控制偵測到的臉部大小範圍。當圖像解析度較大時,您可以增加這些數值來避免檢測到過小的臉。
- 修改範例:
- `minSize=(30,30)`, `maxSize=(100,100)`:用於偵測更小的臉。
- `minSize=(100,100)`, `maxSize=(300,300)`:用於高解析度圖像中更大的臉部。
### 具體修改範例:
1. 偵測更大範圍的人臉,並提高偵測速度:
```python
faces = face_cascade.detectMultiScale(img, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30), maxSize=(200, 200))
```
2. 提高檢測的精確度,並且專注於較小的臉部:
```python
faces = face_cascade.detectMultiScale(img, scaleFactor=1.02, minNeighbors=10, minSize=(20, 20), maxSize=(60, 60))
```
### 偵測過程的實驗:
- **`scaleFactor`** 越接近 1,偵測越精確但越耗時;越大,則偵測速度較快,但可能忽略一些人臉。
- **`minNeighbors`** 越大,偵測到的臉越可靠;越小,偵測的結果會更多,但可能有更多的誤判。
- **`minSize`** 和 **`maxSize`** 可以根據圖像的解析度來設定,這樣可以防止偵測到過小或過大的無關區域。
這樣,您可以根據具體情況來調整這些參數,以最佳化人臉偵測的結果。