OpenVINO face recognition

# OpenVINO face recognition ###### tags: `Intel-Movidius` :::danger **Warning**: Every program in this note cannot be executed directly. ::: ## Recognition flow 1. Find the face 2. Calculate keypoint to align the face 3. Face Alignment 4. Calculate face feature and compare with ground truth ## Model Three models are required, they are 1. face detection model - to detect face 2. face landmark model - to calculate keypoint 3. face recognition model - to identify #### Face detection In official demo[1], two models are recommended. They are ```face-detection-retail-0004``` and ```face-detection-adas-0001```. The former is based on SqueezeNet + SSD and the other is based on MobileNet. We will use ```face-detection-adas-0001``` here. ```SHELL=1 sudo python3 downloader.py --name face-detection-adas-0001 ``` [face-detection-adas-0001](https://github.com/opencv/open_model_zoo/tree/master/models/intel/face-detection-adas-0001) #### Face landmark There are two models refer to this topic, ```facial-landmarks-35-adas-0002``` and ``` landmarks-regression-retail-0009```. The former will generates 35 keypoints and the other one generates 5. According to [1]```landmarks-regression-retail-0009``` is recommended. ```SHELL=1 sudo python3 downloader.py --name landmarks-regression-retail-0009 ``` [landmarks-regression-retail-0009](https://github.com/opencv/open_model_zoo/tree/master/models/intel/landmarks-regression-retail-0009) #### Face recognition These models will compute the facial embedded features. We will use ```face-reidentification-retail-0095``` here. ```SHELL=1 sudo python3 downloader.py --name face-reidentification-retail-0095 ``` [face-reidentification-retail-0095](https://github.com/opencv/open_model_zoo/tree/master/models/intel/face-reidentification-retail-0095) ## Face detection program ![](https://i.imgur.com/mhH8djq.jpg) ![](https://i.imgur.com/uJVrGR7.jpg) ```SHELL=1 import cv2 import numpy as np from openvino.inference_engine import IENetwork, IECore model_name = 'face-detection-adas-0001' path = "/opt/intel/openvino_2020.2.120/deployment_tools/open_model_zoo/tools/downloader/intel/face-detection-adas-0001/FP16/" ie = IECore() # Load CPU extenstion net = ie.read_network(model=path+model_name+'.xml', weights=path+model_name+'.bin') # Read IR model input_blob = next(iter(net.inputs)) # Get input name out_blob = next(iter(net.outputs)) # Get output name batch,channel,height,width = net.inputs[input_blob].shape # Get input shape print("Load IR to device") exec_net = ie.load_network(network=net, device_name='MYRIAD') # Load IR model to device print("Start") cap = cv2.VideoCapture(1) while True: ret, image = cap.read() cv2.imshow("input", image) # Read image and manipulate ori_shape = image.shape # Record original size image = cv2.resize(image, (width, height)) # Resize to network image size t_image = image.transpose((2, 0, 1)) # Change data layout from HWC to CHW """Infer!!!""" res = exec_net.infer(inputs={input_blob: t_image}) # Inference idx = np.argsort(np.squeeze(res[out_blob][0]))[::-1] for i in range(res['detection_out'].shape[2]): # res[0][0][index] -> [image_id, label, conf, x_min, y_min, x_max, y_max] out = res['detection_out'][0][0][i] if out[2] > 0.5: x_l = int(width * out[3]) y_l = int(height * out[4]) x_u = int(width * out[5]) y_u = int(height * out[6]) cv2.rectangle(image, (x_l, y_l), (x_u, y_u), (0, 255, 0), 2) image = cv2.resize(image, (ori_shape[1], ori_shape[0])) cv2.imshow("output", image) #cv2.imwrite('detect_face.jpg', image) print(res['detection_out'][0][0][0]) if 0xFF & cv2.waitKey(1) == 27: break cv2.destroyAllWindows() ``` ## Face landmark program ![](https://i.imgur.com/Vfs98T1.jpg) ![](https://i.imgur.com/vQGEmUb.jpg) ```SHELL=1 import cv2 import numpy as np from openvino.inference_engine import IENetwork, IECore model_name = 'landmarks-regression-retail-0009' path = "/opt/intel/openvino_2020.2.120/deployment_tools/open_model_zoo/tools/downloader/intel/landmarks-regression-retail-0009/FP16/" ie = IECore() # Load CPU extenstion net = ie.read_network(model=path+model_name+'.xml', weights=path+model_name+'.bin') # Read IR model input_blob = next(iter(net.inputs)) # Get input name out_blob = next(iter(net.outputs)) # Get output name batch,channel,height,width = net.inputs[input_blob].shape # Get input shape exec_net = ie.load_network(network=net, device_name='MYRIAD') # Load IR model to device image = cv2.imread('detect_face.jpg') cv2.imshow("input", image) # Read image and manipulate ori_shape = image.shape # Record original size image = cv2.resize(image, (width, height)) # Resize to network image size t_image = image.transpose((2, 0, 1)) # Change data layout from HWC to CHW """Infer!!!""" res = exec_net.infer(inputs={input_blob: t_image}) # Inference idx = np.argsort(np.squeeze(res[out_blob][0]))[::-1] # Save feature file feature = np.array([x,y] for (x,y) in zip(res['95'][0,0::2,0,0],res['95'][0,1::2,0,0])) np.save('truth.npy',feature) # Draw on image for (x,y) in zip(res['95'][0,0::2,0,0],res['95'][0,1::2,0,0]): target_x = int(width*x) target_y = int(height*y) cv2.circle(image, (int(target_x),int(target_y)), 2, (0, 255, 0)) image = cv2.resize(image, (ori_shape[1], ori_shape[0])) cv2.imshow("output", image) cv2.imwrite('detect_keypoint_5.jpg', image) cv2.waitKey(0) cv2.destroyAllWindows() ``` ## Face alignment We need to use affine transformation to transform regressed points to the reference ones. Face image is aligned if five keypoints (left eye, right eye, tip of nose, left lip corner, right lip corner) are located in the following points in normalized coordinates [0,1]x[0,1]: [(0.31556875000000000, 0.4615741071428571), (0.68262291666666670, 0.4615741071428571), (0.50026249999999990, 0.6405053571428571), (0.34947187500000004, 0.8246919642857142), (0.65343645833333330, 0.8246919642857142)] ----Provide by Intel ![](https://i.imgur.com/5jJNZMp.jpg) ![](https://i.imgur.com/T4K8HAi.jpg) ```SHELL=1 # face_shape is the shape of face area which we use to calculate face landmark. landmark_reference = np.float32([[0.31556875000000000, 0.4615741071428571], [0.68262291666666670, 0.4615741071428571], [0.50026249999999990, 0.6405053571428571], [0.34947187500000004, 0.8246919642857142], [0.65343645833333330, 0.8246919642857142]]) landmark_ref = landmark_reference * np.float32([face_shape[1],face_shape[0]]) # res here is the output of landmark network landmark = np.float32([[x,y] for (x,y) in zip(res['95'][0,0::2,0,0],res['95'][0,1::2,0,0])]) * np.float32([face_shape[1],face_shape[0]]) # Get transformation Matrix M = cv2.getAffineTransform(landmark[0:3], landmark_ref[0:3]) dst = cv2.warpAffine(face_area,M,(face_shape[1],face_shape[0])) cv2.imshow("transform", dst) cv2.waitKey(1) ``` Maybe we should not use the cropped image to do transformation. ## Face recognition ![](https://i.imgur.com/kNzCE1x.png) ```SHELL=1 import cv2 import numpy as np from openvino.inference_engine import IENetwork, IECore model_name = 'face-reidentification-retail-0095' path = "/opt/intel/openvino_2020.2.120/deployment_tools/open_model_zoo/tools/downloader/intel/face-reidentification-retail-0095/FP16/" ie = IECore() net = ie.read_network(model=path+model_name+'.xml', weights=path+model_name+'.bin') # Read IR model input_blob = next(iter(net.inputs)) # Get input name out_blob = next(iter(net.outputs)) # Get output name batch, channel, height, width = net.inputs[input_blob].shape # Get input shape exec_net = ie.load_network(network=net, device_name='MYRIAD') # Load IR model to device ground_truth_image = cv2.imread('truth.jpg') ground_truth_feature = np.load('truth.npy') # Ground truth face feature ground_truth_length = np.linalg.norm(ground_truth_feature) # The length of feature cosine_similarity_ths = 0.4 # Compare similarity threshold """ Inference """ test = cv2.imread('test.jpg') """ Perform human face detection, landmark calculation, alignment """ image = cv2.resize(align_face, (width, height)) image = image.transpose((2, 0, 1)) res = exec_net.infer(inputs={input_blob: image}) idx = np.argsort(np.squeeze(res[out_blob][0]))[::-1] feature = res['658'][0,:,0,0] test_length = np.linalg.norm(feature) similarity = np.dot(ground_truth_feature, feature)/(ground_truth_length * test_length) if similarity > cosine_similarity_ths: cv2.putText(image, 'truth', (x_l, y_l), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 1, cv2.LINE_AA) ``` ## Reference [1][OpenVINO face recognition demo](https://docs.openvinotoolkit.org/latest/_demos_python_demos_face_recognition_demo_README.html)