OpenCV Part2 - HackMD

# OpenCV Part2 ## 螢幕手寫繪圖 OCR ```python= ``` ## 螢幕手寫繪圖 OCR + CNN 模型 ```python= ``` ## 二質化 * 目的 : 降低變化，只是要取得外觀 * 結論 : 端看要辨識的物品，如果沒有要辨識顏色或複雜細項，畫素能小就小，能二質化就二質化 ```python= ``` ### opencv + cnn 實作辨識 ```python= # 步驟1 : 模型訓練 from sklearn.model_selection import train_test_split # 匯入 sklearn 用於分割訓練和測試數據 import glob # 匯入 glob 模組，用於檔案模式匹配 import numpy as np # 匯入 NumPy 庫，用於數據處理 import os.path as path # 匯入 os.path 模組，用於處理路徑 import os # 匯入 os 模組，用於操作檔案和目錄 import cv2 # 匯入 OpenCV 庫，用於影像處理 import tensorflow as tf # ��入 TensorFlow ��，用於建立深度� # 定義影像所在的目錄路徑 IMAGEPATH = 'C:\\openCV\\images' # 獲取 IMAGEPATH 目錄下的所有子目錄名稱（每個子目錄代表一個類別） dirs = os.listdir(IMAGEPATH) # 初始化空列表，用於存放影像資料和標籤 X = [] Y = [] print(dirs) # 輸出子目錄名稱列表 i = 0 # 初始化標籤計數器 # 遍歷每個子目錄（每個子目錄代表一個類別） for name in dirs: # 獲取子目錄下所有影像檔案的完整路徑 file_paths = glob.glob(path.join(IMAGEPATH + "/" + name, '*.*')) # 遍歷每個影像檔案 for path3 in file_paths: img = cv2.imread(path3) # 讀取影像 img = cv2.resize(img, (224, 224), interpolation=cv2.INTER_AREA) # 調整影像大小至 224x224 im_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # 將影像從 BGR 轉換為 RGB 格式 X.append(im_rgb) # 將影像資料添加到 X 列表中 Y.append(i) # 將影像的標籤（對應子目錄的索引）添加到 Y 列表中 i = i + 1 # 每處理完一個子目錄，將標籤計數器加一 # 將影像資料和標籤列表轉換為 NumPy 陣列 X = np.asarray(X) Y = np.asarray(Y) # 將影像資料轉換為浮點型，並進行標準化處理 X = X.astype('float32') X = X / 255 # 將像素值歸一化至 [0, 1] 範圍 h=224 w=224 # 將影像資料重塑為 (樣本數量, 224, 224, 3)，3 代表 RGB 三個頻道 X = X.reshape(X.shape[0], h, w, 3) # 獲取分類數目，即子目錄的數量 category = len(dirs) # 獲取影像的維度 dim = X.shape[1] # 將資料集分為訓練集和測試集，測試集佔 5% x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.05) # 輸出訓練集的資料形狀 print(x_train.shape) print(y_train.shape) X = X.astype('float32') X=X/255 X=X.reshape(X.shape[0],h,w,3) category=len(dirs) dim=X.shape[1] x_train , x_test , y_train , y_test = train_test_split(X,Y,test_size=0.05) # 將數字轉為 One-hot 向量 y_train2 = tf.keras.utils.to_categorical(y_train, category) y_test2 = tf.keras.utils.to_categorical(y_test, category) # 載入資料（將資料打散，放入 train 與 test 資料集） print(x_train.shape) datagen = tf.keras.preprocessing.image.ImageDataGenerator( rotation_range=25 , width_shift_range=[-3,3], height_shift_range=[-3,3] , zoom_range=0.3 , data_format='channels_last') # 建立模型 model = tf.keras.models.Sequential() # 加入 2D 的 Convolution Layer，接著一層 ReLU 的 Activation 函數 model.add(tf.keras.layers.Conv2D(filters=32, kernel_size=(3, 3), padding="same", activation='relu', input_shape=(h,w,3))) model.add(tf.keras.layers.Flatten()) model.add(tf.keras.layers.Dense(50, activation='relu')) model.add(tf.keras.layers.Dense(units=category, activation=tf.nn.softmax )) learning_rate = 0.001 opt1 = tf.keras.optimizers.Adam(learning_rate=learning_rate) model.compile( optimizer=opt1, loss=tf.keras.losses.categorical_crossentropy, metrics=['accuracy']) model.summary() """ with open("model_ImageDataGenerator_myImage.json", "w") as json_file: json_file.write(model.to_json()) try: with open('model_ImageDataGenerator_myImage.h5', 'r') as load_weights: model.load_weights("model_ImageDataGenerator_myImage.h5") except IOError: print("File not exists") """ #checkpoint = tf.keras.callbacks.ModelCheckpoint("model_ImageDataGenerator_myImage.weights.keras", monitor='loss', verbose=1, # save_best_only=True, mode='auto', save_freq=1) # 訓練模型 trainData=datagen.flow(x_train,y_train2,batch_size=64) history = model.fit(trainData, epochs=100, # callbacks=[checkpoint] ) #保存模型權重 model.save_weights("model_ImageDataGenerator_myImage.weights.h5") #測試 score = model.evaluate(x_test, y_test2, batch_size=128) # 輸出結果 print("score:",score) predict = model.predict(x_test) print("Ans:",np.argmax(predict[0]),np.argmax(predict[1]),np.argmax(predict[2]),np.argmax(predict[3])) # predict2 = model.predict_classes(x_test) predict2 = np.argmax(predict, axis=1) print("predict_classes:",predict2) print("y_test",y_test[:]) for t1 in predict2: print(dirs[t1]) img=x_test[0] img=img.reshape(h,w,3) img=img*255 img = img.astype('uint8') img = cv2.resize(img, (224, 224), interpolation=cv2.INTER_AREA) im_bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) i = np.argmax(predict[0]) str1 = dirs[i] + " " + str(predict[0][i]) print(str1) im_bgr = cv2.putText(im_bgr, str1, (10,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0, 0), 1, cv2.LINE_AA) cv2.imshow('image', im_bgr) cv2.waitKey(0) cv2.destroyAllWindows() ``` ```python= 步驟2 : 執行opencv 進行辨識 import tensorflow as tf import cv2 import numpy as np import os.path as path import os import time IMAGEPATH = 'C:\\openCV\\images' dirs = os.listdir(IMAGEPATH) """ json_file = open('model_ImageDataGenerator_myImage.json', 'r') loaded_model_json = json_file.read() json_file.close() model = tf.keras.models.model_from_json(loaded_model_json) """ h=224 w=224 category = len(dirs) # 建立模型 model = tf.keras.models.Sequential() # 加入 2D 的 Convolution Layer，接著一層 ReLU 的 Activation 函數 model.add(tf.keras.layers.Conv2D(filters=32, kernel_size=(3, 3), padding="same", activation='relu', input_shape=(h,w,3))) model.add(tf.keras.layers.Flatten()) model.add(tf.keras.layers.Dense(50, activation='relu')) model.add(tf.keras.layers.Dense(units=category, activation=tf.nn.softmax )) model.compile(loss=tf.keras.losses.categorical_crossentropy, optimizer=tf.keras.optimizers.Adadelta(), metrics=['accuracy']) model.load_weights("model_ImageDataGenerator_myImage.weights.h5") model.summary() url="http://172.19.106.80:4747/video" cap = cv2.VideoCapture(url) # while(True): # ret, img = cap.read() # if ret==True: # resized = cv2.resize(img, (224, 224), interpolation=cv2.INTER_AREA) # image = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB) # print(image.shape) # image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2])) # predict = model.predict(image) # i=np.argmax(predict[0]) # str1 =dirs[i] +" "+str(predict[0][i]) # print(str1) # img = cv2.putText(img, str1, (30, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0),2,cv2.LINE_AA) # cv2.imshow('image',img) # # time.sleep(5) # if cv2.waitKey(50) & 0xFF == ord('q'): # break # cap.release() # cv2.destroyAllWindows() # start_recognition = False # 初始化為 False，表示尚未開始辨識 # while True: # ret, img = cap.read() # if ret == True: # key = cv2.waitKey(50) & 0xFF # if key == ord('c'): # resized = cv2.resize(img, (224, 224), interpolation=cv2.INTER_AREA) # image = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB) # print(image.shape) # image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2])) # predict = model.predict(image) # i = np.argmax(predict[0]) # str1 = dirs[i] + " " + str(predict[0][i]) # print(str1) # img = cv2.putText(img, str1, (30, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA) # cv2.imshow('image', img) # if key == ord('q'): # break # cap.release() # cv2.destroyAllWindows() freeze_frame = False # 初始化為 False，表示畫面未凍結 while True: if not freeze_frame: # 當畫面未凍結時，讀取新的影像 ret, img = cap.read() if ret == True: key = cv2.waitKey(50) & 0xFF # 檢查是否有按鍵輸入 if key == ord('c') and not freeze_frame: # 當按下C鍵且畫面未凍結時進行辨識 resized = cv2.resize(img, (224, 224), interpolation=cv2.INTER_AREA) # 調整影像大小 image = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB) # 將影像轉換為RGB格式 print(image.shape) # 輸出影像的形狀資訊 image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2])) # 重新塑形以符合模型輸入 predict = model.predict(image) # 執行模型預測 i = np.argmax(predict[0]) # 找出預測結果中機率最高的類別 str1 = dirs[i] + " " + str(predict[0][i]) # 構建顯示的文字訊息 print(str1) # 輸出辨識結果 img = cv2.putText(img, str1, (30, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA) # 在影像上顯示結果 freeze_frame = True # 當按下C鍵後，凍結畫面 cv2.imshow('image', img) # 顯示影像 if key == ord('c') and freeze_frame: # 再次按下C鍵時，解除凍結，準備讀取下一幀影像 freeze_frame = False if key == ord('q'): break # 按下Q鍵結束程式 cap.release() # 釋放攝影機資源 cv2.destroyAllWindows() # 關閉所有OpenCV視窗 ``` ![image](https://hackmd.io/_uploads/Hk-hCZmjR.png) ![image](https://hackmd.io/_uploads/BkR00bXjC.png) ## 多物件影像辨識 ### HSL HSV * 目的把顏色與物體拆分 > 多物件 > ![image](https://hackmd.io/_uploads/H1t7AAzsC.png) 參數不會調整，主要是返回值 > 找特徵物件 > 只要是黑色或是白色的邊緣都可以找到輪廓 > 如果是在流水線上面的檢驗，盡量讓背景是差異很大，例如動畫綠背景，就是為了這個目的 > ![image](https://hackmd.io/_uploads/S1B2zyQjR.png) >算面積 >![image](https://hackmd.io/_uploads/HJBnQ1XjC.png) > 寫字 > 框起來可以透過程式加上及時調整參數方式，找到最好的參數 ```python= # part1 #!/usr/bin/env python # author: Powen Ko 柯博文老師 www.powenko.com # -*- coding: utf-8 -*- from sklearn.model_selection import train_test_split # 匯入 sklearn 用於分割訓練和測試數據 import glob # 匯入 glob 模組，用於檔案模式匹配 import numpy as np # 匯入 NumPy 庫，用於數據處理 import os.path as path # 匯入 os.path 模組，用於處理路徑 import os # 匯入 os 模組，用於操作檔案和目錄 import cv2 # 匯入 OpenCV 庫，用於影像處理 import tensorflow as tf # 匯入 TensorFlow 庫，用於建立深度學習模型 # 定義影像所在的目錄路徑 IMAGEPATH = 'images' # 獲取 IMAGEPATH 目錄下的所有子目錄名稱（每個子目錄代表一個類別） dirs = os.listdir(IMAGEPATH) # 初始化空列表，用於存放影像資料和標籤 X = [] Y = [] print(dirs) # 輸出子目錄名稱列表 i = 0 # 初始化標籤計數器 # 遍歷每個子目錄（每個子目錄代表一個類別） for name in dirs: # 獲取子目錄下所有影像檔案的完整路徑 file_paths = glob.glob(path.join(IMAGEPATH + "/" + name, '*.*')) # 遍歷每個影像檔案 for path3 in file_paths: img = cv2.imread(path3) # 讀取影像 img = cv2.resize(img, (224, 224), interpolation=cv2.INTER_AREA) # 調整影像大小至 224x224 im_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # 將影像從 BGR 轉換為 RGB 格式 X.append(im_rgb) # 將影像資料添加到 X 列表中 Y.append(i) # 將影像的標籤（對應子目錄的索引）添加到 Y 列表中 i = i + 1 # 每處理完一個子目錄，將標籤計數器加一 # 將影像資料和標籤列表轉換為 NumPy 陣列 X = np.asarray(X) Y = np.asarray(Y) # 將影像資料轉換為浮點型，並進行標準化處理 X = X.astype('float32') X = X / 255 # 將像素值歸一化至 [0, 1] 範圍 h = 224 # 設定影像的高度 w = 224 # 設定影像的寬度 # 將影像資料重塑為 (樣本數量, 224, 224, 3)，3 代表 RGB 三個頻道 X = X.reshape(X.shape[0], h, w, 3) # 獲取分類數目，即子目錄的數量 category = len(dirs) # 獲取影像的維度 dim = X.shape[1] # 將資料集分為訓練集和測試集，測試集佔 5% x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.05) # 輸出訓練集的資料形狀 print(x_train.shape) print(y_train.shape) # 再次將影像資料轉換為浮點型，並進行標準化處理（多餘的步驟，已在上面處理過） X = X.astype('float32') X = X / 255 # 將像素值歸一化至 [0, 1] 範圍 X = X.reshape(X.shape[0], h, w, 3) # 重塑影像資料為 (樣本數量, 224, 224, 3) category = len(dirs) # 獲取分類數目 dim = X.shape[1] # 獲取影像的維度 x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.05) # 分割資料集 # 將數字標籤轉為 One-hot 向量 y_train2 = tf.keras.utils.to_categorical(y_train, category) y_test2 = tf.keras.utils.to_categorical(y_test, category) # 建立資料增強器，用於在訓練過程中進行影像增強 datagen = tf.keras.preprocessing.image.ImageDataGenerator( rotation_range=25, # 影像隨機旋轉角度 width_shift_range=[-3, 3], # 影像隨機水平平移範圍 height_shift_range=[-3, 3], # 影像隨機垂直平移範圍 zoom_range=0.3, # 影像隨機縮放範圍 data_format='channels_last' # 指定資料格式為 channels_last (即 (height, width, channels)) ) # 建立模型 model = tf.keras.models.Sequential() # 加入 2D 的 Convolution Layer，使用 ReLU 激活函數 model.add(tf.keras.layers.Conv2D(filters=32, kernel_size=(3, 3), padding="same", activation='relu', input_shape=(h, w, 3))) # 將多維度的輸出轉為一維 model.add(tf.keras.layers.Flatten()) # 加入全連接層，包含 50 個神經元，使用 ReLU 作為激活函數 model.add(tf.keras.layers.Dense(50, activation='relu')) # 最後一層使用 softmax 激活函數進行分類，輸出單元數與分類數相同 model.add(tf.keras.layers.Dense(units=category, activation=tf.nn.softmax)) # 設定學習率 learning_rate = 0.001 opt1 = tf.keras.optimizers.Adam(learning_rate=learning_rate) # 編譯模型，使用 categorical_crossentropy 作為損失函數，評估指標為準確率 model.compile( optimizer=opt1, loss=tf.keras.losses.categorical_crossentropy, metrics=['accuracy']) # 輸出模型摘要資訊 model.summary() # 準備訓練資料 trainData = datagen.flow(x_train, y_train2, batch_size=64) # 訓練模型 history = model.fit(trainData, epochs=100) # 設定訓練的迭代次數為 100 次 # 保存模型權重 model.save_weights("model_ImageDataGenerator_myImage.weights.h5") # 測試模型 score = model.evaluate(x_test, y_test2, batch_size=128) # 輸出結果 print("score:", score) # 進行預測 predict = model.predict(x_test) print("Ans:", np.argmax(predict[0]), np.argmax(predict[1]), np.argmax(predict[2]), np.argmax(predict[3])) # 將預測結果轉為標籤類別 predict2 = np.argmax(predict, axis=1) print("predict_classes:", predict2) print("y_test", y_test[:]) for t1 in predict2: print(dirs[t1]) # 顯示測試影像及其預測結果 img = x_test[0] # 取出第一張測試影像 img = img.reshape(h, w, 3) # 重塑影像為原本形狀 img = img * 255 # 將歸一化的影像像素值還原 img = img.astype('uint8') # 轉換資料型別為 unsigned 8-bit integer img = cv2.resize(img, (224, 224), interpolation=cv2.INTER_AREA) # 調整影像大小 im_bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) # 將影像從 RGB 轉換回 BGR 格式（OpenCV 使用 BGR） i = np.argmax(predict[0]) # 獲取預測結果的最大值索引 str1 = dirs[i] + " " + str(predict[0][i]) # 組合顯示文字 print(str1) im_bgr = cv2.putText(im_bgr, str1, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 1, cv2.LINE_AA) # 在影像上顯示文字 cv2.imshow('image', im_bgr) # 顯示影像 cv2.imshow('image', im_bgr) # 顯示影像窗口 cv2.waitKey(0) # 等待鍵盤輸入，防止窗口自動關閉 cv2.destroyAllWindows() # 關閉所有 OpenCV 窗口 ``` ```python= #part2 #!/usr/bin/env python # -*- coding=utf-8 -*- __author__ = "柯博文老師 Powen Ko, www.powenko.com" import numpy as np import cv2 import os import tensorflow as tf # 定義影像所在的目錄路徑 IMAGEPATH = 'images' # 獲取 IMAGEPATH 目錄下的所有子目錄名稱（每個子目錄代表一個類別） dirs = os.listdir(IMAGEPATH) # 設定影像高度和寬度 h = 224 w = 224 # 獲取分類數目，即子目錄的數量 category = len(dirs) # 建立模型 model = tf.keras.models.Sequential() # 加入 2D 的 Convolution Layer，接著一層 ReLU 的 Activation 函數 model.add(tf.keras.layers.Conv2D(filters=32, kernel_size=(3, 3), padding="same", activation='relu', input_shape=(h, w, 3))) # 將多維度的輸出轉為一維 model.add(tf.keras.layers.Flatten()) # 加入全連接層，包含 50 個神經元，使用 ReLU 作為激活函數 model.add(tf.keras.layers.Dense(50, activation='relu')) # 最後一層使用 softmax 激活函數進行分類，輸出單元數與分類數相同 model.add(tf.keras.layers.Dense(units=category, activation=tf.nn.softmax )) # 設定學習率 learning_rate = 0.001 opt1 = tf.keras.optimizers.Adam(learning_rate=learning_rate) # 編譯模型，使用 categorical_crossentropy 作為損失函數，評估指標為準確率 model.compile( optimizer=opt1, loss=tf.keras.losses.categorical_crossentropy, metrics=['accuracy']) # 輸出模型摘要資訊 model.summary() # 讀取已訓練的權重 model.load_weights("model_ImageDataGenerator_myImage.weights.h5") # 定義 CNN 模型的預測函數 def CNNPredict(img): # 調整影像大小為模型要求的大小 resized = cv2.resize(img, (224, 224), interpolation=cv2.INTER_AREA) # 轉換影像顏色為 RGB image = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB) # 重塑影像為符合模型輸入格式的四維陣列 image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2])) # 使用模型進行預測 predict2 = model.predict(image) return predict2 # 初始化計數器與影像寬度 counter = 0 w2 = 640 # 定義調整面積閾值的函數 def update_area_threshold(val): global area_threshold area_threshold = val # 定義調整 threshold 閾值的函數 def update_threshold(val): global threshold_value threshold_value = val # 初始化 OpenCV 視訊捕捉裝置 cap = cv2.VideoCapture(0) # 建立 OpenCV 視窗與 trackbar cv2.namedWindow('Objects Detected') cv2.createTrackbar('Area Threshold', 'Objects Detected', 2000, 40000, update_area_threshold) cv2.createTrackbar('Threshold', 'Objects Detected', 0, 255, update_threshold) # 初始化閾值 area_threshold = 2236 threshold_value = 46 while(True): # 讀取攝影機畫面 ret, rawImage = cap.read() if ret == True: # 調整影像大小，保持寬度為 w2，且保持比例 resize = cv2.resize(rawImage, (w2, int(rawImage.shape[0] * w2 / rawImage.shape[1])), interpolation=cv2.INTER_AREA) # 將影像從 BGR 轉為 HSV 色彩空間 hsv = cv2.cvtColor(resize, cv2.COLOR_BGR2HSV) # 分割 HSV 影像的色調、飽和度與明度 hue, saturation, value = cv2.split(hsv) # 進行二值化處理，使用 OTSU 方法自動決定最佳 threshold retval, thresholded = cv2.threshold(saturation, threshold_value, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) cv2.imshow('thresholded', thresholded) # 使用中值濾波來去除噪點 medianFiltered = cv2.medianBlur(thresholded, 5) # 找出影像中的輪廓 contours, hierarchy = cv2.findContours(medianFiltered, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) contour_list = [] # 遍歷所有輪廓，並將面積大於指定值的輪廓加入列表 for contour in contours: area = cv2.contourArea(contour) if area > area_threshold: contour_list.append(contour) # 遍歷符合條件的輪廓，進行影像分類預測 for contour1 in contour_list: contour_list_x = [] contour_list_y = [] # 取得輪廓的所有座標點 for xy in contour1: contour_list_x.append(xy[0][0]) contour_list_y.append(xy[0][1]) # 找出輪廓的最小與最大座標點 x = x1 = min(contour_list_x) y = y1 = min(contour_list_y) x2 = max(contour_list_x) y2 = max(contour_list_y) # 使用模型進行預測 predict = CNNPredict(resize[y1:y2, x1:x2, :].copy()) # 畫出預測框 cv2.rectangle(resize, (x1, y1), (x2, y2), (0, 255, 0), 1) # 獲得預測結果 i = np.argmax(predict[0]) # 組合顯示文字，包括類別名稱、預測機率與面積 str1 = dirs[i] + " ," + str(predict[0][i]) + " , " + str(cv2.contourArea(contour1)) # 顯示預測結果文字 cv2.putText(resize, str(str1), (x, y), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0)) # 畫出所有符合條件的輪廓 cv2.drawContours(resize, contour_list, -1, (255, 0, 0), 2) # 顯示處理後的影像 cv2.imshow('Objects Detected', resize) # 偵測按鍵，如果按下 'q' 或 'Esc' 鍵，則退出循環 key = cv2.waitKey(10) if key & 0xFF == ord('q') or key == 27: break # 釋放攝影機並關閉所有視窗 cap.release() cv2.destroyAllWindows() ``` ### YOLO > 先介紹opencv 的 haar cascades > 進行局部人臉辨識 ![image](https://hackmd.io/_uploads/ry4yOX7jA.png) ```python= #v1 import numpy as np import cv2 face_cascade = cv2.CascadeClassifier('C:\\03-haar\\haarcascades/haarcascade_frontalface_default.xml') #讀取權重檔 url="http://172.19.106.80:4747/video" cap = cv2.VideoCapture(url) while(True): k=cv2.waitKey(1) if k==27 or k == ord('q'): # 離開迴圈 break ret, img = cap.read() if img is not None: img = cv2.resize(img, (640,480), interpolation = cv2.INTER_AREA)# 調整圖片大小 gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # 彩色轉灰階 faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=3, minSize=(100, 100), flags=cv2.CASCADE_SCALE_IMAGE) # 找物件 for (x, y, w, h) in faces: # 位置 img = cv2.rectangle(img, (x, y), (x + w, y + h), (255, 0, 0), 2) # 圈出臉的位置 cv2.imshow('img',img) cap.release() cv2.destroyAllWindows() ``` ```python= #v2 import numpy as np import cv2 import datetime import os import time # 讀取權重檔 cascade_path = 'C:\\opencv/haarcascade_frontalface_default.xml' face_cascade = cv2.CascadeClassifier(cascade_path) # 照片存檔路徑與權重檔案相同 save_directory = os.path.dirname(cascade_path) # 開啟攝影機 cap = cv2.VideoCapture(0) # 是否開始連續拍照的旗標 save_face = False while(True): k = cv2.waitKey(1) if k == 27 or k == ord('q'): # 按下ESC或q鍵離開迴圈 break elif k == ord('s'): # 按下s鍵後開始連續拍照 save_face = True ret, img = cap.read() if img is not None: img = cv2.resize(img, (640, 480), interpolation=cv2.INTER_AREA) # 調整圖片大小 gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # 彩色轉灰階 faces = face_cascade.detectMultiScale( gray, scaleFactor=1.05, # 調整縮放係數，更細緻地檢測較小臉部 minNeighbors=3, minSize=(50, 50), # 調整最小臉部大小以便檢測嬰兒臉部 flags=cv2.CASCADE_SCALE_IMAGE ) # 偵測臉部 for (x, y, w, h) in faces: # 取得臉部位置 img = cv2.rectangle(img, (x, y), (x + w, y + h), (255, 0, 0), 2) # 圈出臉的位置 if save_face: # 取得臉部區域 face_img = img[y:y+h, x:x+w] # 取得當前日期和時間，包括毫秒 timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S_%f") # 設定檔名並存檔到權重檔案相同路徑 filename = os.path.join(save_directory, f"face_{timestamp}.png") cv2.imwrite(filename, face_img) print(f"圖片已存檔為: {filename}") # 每秒拍攝一張 time.sleep(1) # 顯示圖片 cv2.imshow('img', img) # 釋放攝影機資源並關閉視窗 cap.release() cv2.destroyAllWindows() ``` ### 專題做法 1. 先拍照透過haar抓臉， 2. 丟CNN訓練 3. 得到權重檔後， 4. 再透過haar 拍照抓臉，丟比對 > 用google colab執行人家的權重與設定檔 ### VOC 一個XML + JPG檔案 ![image](https://hackmd.io/_uploads/HyT9VMVoC.png) 因為在做標籤時使用VOC，所以要將VOC型態轉yolo需要的型態 ![image](https://hackmd.io/_uploads/H1pfqGEj0.png) 如果檔案有問題可以刪除或是修檔案 ![image](https://hackmd.io/_uploads/SkjbpzNjC.png) 如果準度不理想可以調整這裡(訓練次數) ![image](https://hackmd.io/_uploads/BkH4kQ4oC.png) 訓練完後最值錢的權重檔案 ![image](https://hackmd.io/_uploads/HyLcemVi0.png) ![image](https://hackmd.io/_uploads/B1b_DQNjR.png)