基於 Yolov3-tiny 與 dlib 的人臉辨識

# 基於 Yolov3-tiny 與 dlib 的人臉辨識辨識原理是先用yolo框臉部的範圍，再利用python dlib進行特徵點的抓取，最後取左臉邊界(2)、右臉邊界(16)、眉心(21,22)、下巴邊界(8)以及鼻子(30)座標，計算頭部水平、垂直旋轉量。\ \ 以下用區段的方式介紹程式碼，先附上完整程式碼以及結果圖\ 程式碼： ```python= import numpy as np import cv2 import dlib import time import math net = cv2.dnn.readNetFromDarknet("/home/bmeimage/Desktop/eBrain/face_detection/weight/face-yolov3-tiny.cfg", "/home/bmeimage/Desktop/eBrain/face_detection/weight/face-yolov3-tiny_41000.weights") #讀取模型 layer_names = net.getLayerNames() output_layers = [] for i in net.getUnconnectedOutLayers(): output_layers.append(layer_names[i-1]) predictor = dlib.shape_predictor("/home/bmeimage/Desktop/eBrain/face_detection/weight//shape_predictor_68_face_landmarks.dat") detector = dlib.get_frontal_face_detector() start = 0 end = 0 cap=cv2.VideoCapture(0) x = 0 y = 0 w = 0 h = 0 nose = None left = None right = None up = None down = None xangle = 0 yangle =0 while(True): state = False start = time.time() ret,img =cap.read() img=cv2.flip(img,1) img1=img.copy() #cv2.imshow("RAW", img1) height, width, channels = img.shape blob = cv2.dnn.blobFromImage(img, 1/255.0, (600, 600), (0, 0, 0), True, crop = False) net.setInput(blob) outs = net.forward(output_layers) confidences = [] boxes = [] for out in outs: for detection in out : tx, ty, tw, th, confidence = detection[0:5] scores = detection[5:] if confidence > 0.3: state = True center_x = int(tx*width) center_y = int(ty*height) w = int(tw*width)+10 h = int(th*height)+10 x = int(center_x - w/2)-5 y = int(center_y - h/2)-2 boxes.append([x, y, w, h]) confidences.append(float(confidence)) if state: crop_img = img[y:y+h, x:x+w] #cv2.imshow("crop image",crop_img) dets = detector(crop_img,1) for det in dets: landmark=[] for p in predictor(crop_img,det).parts(): landmark.append(np.matrix([p.x,p.y])) for idx, point in enumerate(landmark): pos = (point[0, 0]+x, point[0, 1]+y) #cv2.circle(img, pos, 5, (0,0,255),-1) if(idx == 30): #nose nose = pos cv2.circle(img, pos, 5, (0,0,255),-1) elif(idx == 1): left = pos cv2.circle(img, pos, 5, (0,0,255),-1) elif(idx == 15): right = pos cv2.circle(img, pos, 5, (0,0,255),-1) elif(idx == 21): up = pos cv2.circle(img, pos, 2, (255,255,255),-1) elif(idx == 22): up = (int((pos[0]+up[0])/2),int((pos[1]+up[1])/2)) cv2.circle(img, pos, 2, (255,255,255),-1) cv2.circle(img, up, 5, (0,0,255),-1) elif(idx == 8): down = pos cv2.circle(img, pos, 5, (0,0,255),-1) else: cv2.circle(img, pos, 2, (255,255,255),-1) end=time.time() cv2.rectangle(img, (0,0), (190, 109), (50,50,50), -1) cv2.putText(img, " Webcam frame: "+str(cap.get(cv2.CAP_PROP_FPS)), (0, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1, cv2.LINE_AA) cv2.putText(img, " Real frame: "+str(1//(end-start)), (0, 43), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (50, 50, 255), 1, cv2.LINE_AA) if(nose != None and left != None and right != None and state == True): cv2.line(img, nose, left, (0, 0, 255), 1) cv2.line(img, nose, right, (0, 0, 255), 1) print(nose[0]-left[0],right[0]-nose[0],end=" ") if((nose[0]-left[0])>(right[0]-nose[0])): #print("Right") angle = 90 - math.degrees(math.acos(1-((2*abs(right[0]-nose[0]))/(right[0]-left[0])))) elif((nose[0]-left[0])<(right[0]-nose[0])): #print("Left") angle = -1 *(90 - math.degrees(math.acos(1-((2*abs(nose[0]-left[0]))/(right[0]-left[0]))))) else: angle = 0 print(angle) cv2.putText(img, " Angle(x): " + str(round(angle,2)), (0, 66), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1, cv2.LINE_AA) else: cv2.putText(img, " Angle(x): None", (0, 66), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA) if(nose != None and up != None and down != None and state == True): cv2.line(img, nose, up, (0, 0, 255), 1) cv2.line(img, nose, down, (0, 0, 255), 1) print(nose[1]-up[1],down[1]-nose[1],end=" ") if((nose[1]-up[1])>(down[1]-nose[1])): #print("up") angle = -1 * (90 - math.degrees(math.acos(1-((2*abs(down[1]-nose[1]))/(down[1]-up[1]))))) elif((nose[1]-up[1])<(down[1]-nose[1])): #print("down") angle = 90 - math.degrees(math.acos(1-((2*abs(nose[1]-up[1]))/(down[1]-up[1])))) else: angle = 0 print(angle) cv2.putText(img, " Angle(y): " + str(round(angle,2)), (0, 89), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1, cv2.LINE_AA) else: cv2.putText(img, " Angle(y): None", (0, 89), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA) indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.3, 0.4) for i in range(len(boxes)) : if i in indexes : x, y, w, h = boxes[i] label = "face"+" "+str(int(confidences[i]*100))+"%" color = (255,255,255) cv2.rectangle(img, (x, y), (x+w, y+h), color, 1) cv2.putText(img, label, (x, y-5), cv2.FONT_HERSHEY_PLAIN, 1, color, 2) cv2.imshow("main", img) key=cv2.waitKey(1) if key==ord('q') or key==ord('Q') or key==27: break cap.release() cv2.destroyAllWindows() ``` 結果運行： ![2024-01-02_04-25-14 (6)](https://hackmd.io/_uploads/rkYR5NBda.gif) ---- # 辨識人臉 **首先先引入所需要的函式庫**：\ 包含讀取Yolo模型的cv2(opencv)和讀取特徵點的dlib ```python= import numpy as np import cv2 import dlib import time import math ... ``` \ **接著輸入模型檔、模型組態以及輸出規格**：\ `face-yolov3-tiny.cfg` 是 yolov3-tiny 的組態檔，與模型架構相關\ `face-yolov3-tiny_41000.weights` 是yolov3-tiny 的權重檔，與模型中神經元計算權重相關，檔名中 `41000` 代表該權重係數訓練41000次 ```python= ... net = cv2.dnn.readNetFromDarknet("/home/bmeimage/Desktop/eBrain/face_detection/weight/face-yolov3-tiny.cfg", "/home/bmeimage/Desktop/eBrain/face_detection/weight/face-yolov3-tiny_41000.weights") #讀取模型 layer_names = net.getLayerNames() output_layers = [] for i in net.getUnconnectedOutLayers(): output_layers.append(layer_names[i-1]) #輸出規格 predictor = dlib.shape_predictor("/home/bmeimage/Desktop/eBrain/face_detection/weight//shape_predictor_68_face_landmarks.dat") detector = dlib.get_frontal_face_detector() ... ``` \ **變數的初始化以及取得webcam物件**：\ `start` 和 `end` 是計算辨識效率相關的變數\ `cap`是webcam物件，參數 `0` 大多是內建鏡頭， `1` 數字以上皆是外接鏡頭\ `nose`、`left`、`right`、`up`、`down`、`xangle`、`yangle` 是計算頭部轉向相關的變數(可以先忽略)\ `x`、`y`、`w`、`h` 是臉部範圍變數 ```python= ... start = 0 end = 0 cap=cv2.VideoCapture(0) x = 0 y = 0 w = 0 h = 0 nose = None left = None right = None up = None down = None xangle = 0 yangle =0 ... ``` \ **接著進入執行的迴圈內**：\ 先辨識臉部範圍後，將圖片裁切留下臉部，接著辨識特徵點。\ 由鏡頭物件讀入影像，對影像做預處裡後，輸入到模型中\ 輸出得出辨識的臉部範圍：`x` `y`是框線左上的座標(x,y)，`w` `h`分別是框選的長、寬\ `confidences` 則是辨識的信心指數(0.8表示80%信心框選目標在框選區域內) ```python= ... ret,img =cap.read() img=cv2.flip(img,1) img1=img.copy() #cv2.imshow("RAW", img1) height, width, channels = img.shape blob = cv2.dnn.blobFromImage(img, 1/255.0, (600, 600), (0, 0, 0), True, crop = False) net.setInput(blob) #將影像輸入模型 outs = net.forward(output_layers) confidences = [] boxes = [] for out in outs: for detection in out : tx, ty, tw, th, confidence = detection[0:5] scores = detection[5:] if confidence > 0.3: state = True center_x = int(tx*width) center_y = int(ty*height) w = int(tw*width)+10 h = int(th*height)+10 x = int(center_x - w/2)-5 y = int(center_y - h/2)-2 boxes.append([x, y, w, h]) confidences.append(float(confidence)) ... ``` \ 若以上人臉辨識成立，即接著辨識特徵點\ 將上述辨識的框選範圍對影像裁切，減少對於特徵點辨識的計算量以及錯誤判別\ `detector` 取得正臉後，`predictor` 取得臉部的特徵點(landmark)\ 再根據特徵點的編號分別取得左臉邊界(1)、右臉邊界(15)、眉心(21,22)、下巴邊界(8)以及鼻子(30)座標，像是圖中顯示： ![未命名](https://hackmd.io/_uploads/Sy6cXYSu6.png) ```python= ... if state: crop_img = img[y:y+h, x:x+w] #cv2.imshow("crop image",crop_img) dets = detector(crop_img,1) for det in dets: landmark=[] for p in predictor(crop_img,det).parts(): landmark.append(np.matrix([p.x,p.y])) for idx, point in enumerate(landmark): pos = (point[0, 0]+x, point[0, 1]+y) #cv2.circle(img, pos, 5, (0,0,255),-1) if(idx == 30): #nose nose = pos cv2.circle(img, pos, 5, (0,0,255),-1) elif(idx == 1): left = pos cv2.circle(img, pos, 5, (0,0,255),-1) elif(idx == 15): right = pos cv2.circle(img, pos, 5, (0,0,255),-1) elif(idx == 21): up = pos cv2.circle(img, pos, 2, (255,255,255),-1) elif(idx == 22): up = (int((pos[0]+up[0])/2),int((pos[1]+up[1])/2)) cv2.circle(img, pos, 2, (255,255,255),-1) cv2.circle(img, up, 5, (0,0,255),-1) elif(idx == 8): down = pos cv2.circle(img, pos, 5, (0,0,255),-1) else: cv2.circle(img, pos, 2, (255,255,255),-1) ... ``` 完整參考官方Sample Code：[face-landmark-detection](http://dlib.net/face_landmark_detection.py.html)\ \ **註：後面就是計算頭部轉動的角度，這邊就不贅述*\ \ \ 最後就是脫離迴圈的條件\ 預設是讓與使用者按下`q`、`Q`、`ESC`可以跳出迴圈 ```python= ... key=cv2.waitKey(1) if key==ord('q') or key==ord('Q') or key==27: break ... ``` \ **迴切結束後，程式終止** 將鏡頭物件釋出，並且關閉視窗(若未關閉會導致程式崩潰) ```python= ... cap.release() cv2.destroyAllWindows() ```