# OpenVINO face recognition
###### tags: `Intel-Movidius`
:::danger
**Warning**: Every program in this note cannot be executed directly.
:::
## Recognition flow
1. Find the face
2. Calculate keypoint to align the face
3. Face Alignment
4. Calculate face feature and compare with ground truth
## Model
Three models are required, they are
1. face detection model - to detect face
2. face landmark model - to calculate keypoint
3. face recognition model - to identify
#### Face detection
In official demo[1], two models are recommended. They are ```face-detection-retail-0004``` and ```face-detection-adas-0001```. The former is based on SqueezeNet + SSD and the other is based on MobileNet. We will use ```face-detection-adas-0001``` here.
```SHELL=1
sudo python3 downloader.py --name face-detection-adas-0001
```
[face-detection-adas-0001](https://github.com/opencv/open_model_zoo/tree/master/models/intel/face-detection-adas-0001)
#### Face landmark
There are two models refer to this topic, ```facial-landmarks-35-adas-0002``` and ``` landmarks-regression-retail-0009```. The former will generates 35 keypoints and the other one generates 5.
According to [1]```landmarks-regression-retail-0009``` is recommended.
```SHELL=1
sudo python3 downloader.py --name landmarks-regression-retail-0009
```
[landmarks-regression-retail-0009](https://github.com/opencv/open_model_zoo/tree/master/models/intel/landmarks-regression-retail-0009)
#### Face recognition
These models will compute the facial embedded features. We will use ```face-reidentification-retail-0095``` here.
```SHELL=1
sudo python3 downloader.py --name face-reidentification-retail-0095
```
[face-reidentification-retail-0095](https://github.com/opencv/open_model_zoo/tree/master/models/intel/face-reidentification-retail-0095)
## Face detection program
![](https://i.imgur.com/mhH8djq.jpg) ![](https://i.imgur.com/uJVrGR7.jpg)
```SHELL=1
import cv2
import numpy as np
from openvino.inference_engine import IENetwork, IECore
model_name = 'face-detection-adas-0001'
path = "/opt/intel/openvino_2020.2.120/deployment_tools/open_model_zoo/tools/downloader/intel/face-detection-adas-0001/FP16/"
ie = IECore() # Load CPU extenstion
net = ie.read_network(model=path+model_name+'.xml',
weights=path+model_name+'.bin') # Read IR model
input_blob = next(iter(net.inputs)) # Get input name
out_blob = next(iter(net.outputs)) # Get output name
batch,channel,height,width = net.inputs[input_blob].shape # Get input shape
print("Load IR to device")
exec_net = ie.load_network(network=net, device_name='MYRIAD') # Load IR model to device
print("Start")
cap = cv2.VideoCapture(1)
while True:
ret, image = cap.read()
cv2.imshow("input", image) # Read image and manipulate
ori_shape = image.shape # Record original size
image = cv2.resize(image, (width, height)) # Resize to network image size
t_image = image.transpose((2, 0, 1)) # Change data layout from HWC to CHW
"""Infer!!!"""
res = exec_net.infer(inputs={input_blob: t_image}) # Inference
idx = np.argsort(np.squeeze(res[out_blob][0]))[::-1]
for i in range(res['detection_out'].shape[2]): # res[0][0][index] -> [image_id, label, conf, x_min, y_min, x_max, y_max]
out = res['detection_out'][0][0][i]
if out[2] > 0.5:
x_l = int(width * out[3])
y_l = int(height * out[4])
x_u = int(width * out[5])
y_u = int(height * out[6])
cv2.rectangle(image, (x_l, y_l), (x_u, y_u), (0, 255, 0), 2)
image = cv2.resize(image, (ori_shape[1], ori_shape[0]))
cv2.imshow("output", image)
#cv2.imwrite('detect_face.jpg', image)
print(res['detection_out'][0][0][0])
if 0xFF & cv2.waitKey(1) == 27:
break
cv2.destroyAllWindows()
```
## Face landmark program
![](https://i.imgur.com/Vfs98T1.jpg) ![](https://i.imgur.com/vQGEmUb.jpg)
```SHELL=1
import cv2
import numpy as np
from openvino.inference_engine import IENetwork, IECore
model_name = 'landmarks-regression-retail-0009'
path = "/opt/intel/openvino_2020.2.120/deployment_tools/open_model_zoo/tools/downloader/intel/landmarks-regression-retail-0009/FP16/"
ie = IECore() # Load CPU extenstion
net = ie.read_network(model=path+model_name+'.xml',
weights=path+model_name+'.bin') # Read IR model
input_blob = next(iter(net.inputs)) # Get input name
out_blob = next(iter(net.outputs)) # Get output name
batch,channel,height,width = net.inputs[input_blob].shape # Get input shape
exec_net = ie.load_network(network=net, device_name='MYRIAD') # Load IR model to device
image = cv2.imread('detect_face.jpg')
cv2.imshow("input", image) # Read image and manipulate
ori_shape = image.shape # Record original size
image = cv2.resize(image, (width, height)) # Resize to network image size
t_image = image.transpose((2, 0, 1)) # Change data layout from HWC to CHW
"""Infer!!!"""
res = exec_net.infer(inputs={input_blob: t_image}) # Inference
idx = np.argsort(np.squeeze(res[out_blob][0]))[::-1]
# Save feature file
feature = np.array([x,y] for (x,y) in zip(res['95'][0,0::2,0,0],res['95'][0,1::2,0,0]))
np.save('truth.npy',feature)
# Draw on image
for (x,y) in zip(res['95'][0,0::2,0,0],res['95'][0,1::2,0,0]):
target_x = int(width*x)
target_y = int(height*y)
cv2.circle(image, (int(target_x),int(target_y)), 2, (0, 255, 0))
image = cv2.resize(image, (ori_shape[1], ori_shape[0]))
cv2.imshow("output", image)
cv2.imwrite('detect_keypoint_5.jpg', image)
cv2.waitKey(0)
cv2.destroyAllWindows()
```
## Face alignment
We need to use affine transformation to transform regressed points to the reference ones.
Face image is aligned if five keypoints (left eye, right eye, tip of nose, left lip corner, right lip corner) are located in the following points in normalized coordinates [0,1]x[0,1]:
[(0.31556875000000000, 0.4615741071428571),
(0.68262291666666670, 0.4615741071428571),
(0.50026249999999990, 0.6405053571428571),
(0.34947187500000004, 0.8246919642857142),
(0.65343645833333330, 0.8246919642857142)] ----Provide by Intel
![](https://i.imgur.com/5jJNZMp.jpg) ![](https://i.imgur.com/T4K8HAi.jpg)
```SHELL=1
# face_shape is the shape of face area which we use to calculate face landmark.
landmark_reference = np.float32([[0.31556875000000000, 0.4615741071428571],
[0.68262291666666670, 0.4615741071428571],
[0.50026249999999990, 0.6405053571428571],
[0.34947187500000004, 0.8246919642857142],
[0.65343645833333330, 0.8246919642857142]])
landmark_ref = landmark_reference * np.float32([face_shape[1],face_shape[0]])
# res here is the output of landmark network
landmark = np.float32([[x,y] for (x,y) in zip(res['95'][0,0::2,0,0],res['95'][0,1::2,0,0])]) * np.float32([face_shape[1],face_shape[0]])
# Get transformation Matrix
M = cv2.getAffineTransform(landmark[0:3], landmark_ref[0:3])
dst = cv2.warpAffine(face_area,M,(face_shape[1],face_shape[0]))
cv2.imshow("transform", dst)
cv2.waitKey(1)
```
Maybe we should not use the cropped image to do transformation.
## Face recognition
![](https://i.imgur.com/kNzCE1x.png)
```SHELL=1
import cv2
import numpy as np
from openvino.inference_engine import IENetwork, IECore
model_name = 'face-reidentification-retail-0095'
path = "/opt/intel/openvino_2020.2.120/deployment_tools/open_model_zoo/tools/downloader/intel/face-reidentification-retail-0095/FP16/"
ie = IECore()
net = ie.read_network(model=path+model_name+'.xml',
weights=path+model_name+'.bin') # Read IR model
input_blob = next(iter(net.inputs)) # Get input name
out_blob = next(iter(net.outputs)) # Get output name
batch, channel, height, width = net.inputs[input_blob].shape # Get input shape
exec_net = ie.load_network(network=net, device_name='MYRIAD') # Load IR model to device
ground_truth_image = cv2.imread('truth.jpg')
ground_truth_feature = np.load('truth.npy') # Ground truth face feature
ground_truth_length = np.linalg.norm(ground_truth_feature) # The length of feature
cosine_similarity_ths = 0.4 # Compare similarity threshold
""" Inference """
test = cv2.imread('test.jpg')
"""
Perform human face detection, landmark calculation, alignment
"""
image = cv2.resize(align_face, (width, height))
image = image.transpose((2, 0, 1))
res = exec_net.infer(inputs={input_blob: image})
idx = np.argsort(np.squeeze(res[out_blob][0]))[::-1]
feature = res['658'][0,:,0,0]
test_length = np.linalg.norm(feature)
similarity = np.dot(ground_truth_feature, feature)/(ground_truth_length * test_length)
if similarity > cosine_similarity_ths:
cv2.putText(image, 'truth', (x_l, y_l), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 1, cv2.LINE_AA)
```
## Reference
[1][OpenVINO face recognition demo](https://docs.openvinotoolkit.org/latest/_demos_python_demos_face_recognition_demo_README.html)