yolo多種變化

## RGB ```python= import cv2 import numpy as np cap = cv2.VideoCapture(0) # 定義滑鼠點擊事件 def mouse_callback(event, x, y, flags, param): if event == cv2.EVENT_LBUTTONDOWN: # 輸出滑鼠點擊位置的像素座標 print("滑鼠點擊事件座標 (x, y):", x, y) # 獲取指定位置的顏色ＲＧＢ值 b, g, r = frame[y, x] print("像素值 (BGR)：", b, g, r) # 創建視窗綁定滑鼠點擊事件範圍 cv2.namedWindow('frame', 0) while True: ret_,frame = cap.read() # 滑鼠點擊事件 cv2.setMouseCallback('frame', mouse_callback) # 顯示影像 cv2.imshow('frame', frame) # 等待滑鼠點擊事件 key = cv2.waitKey(1) & 0xff if key == ord('q'): break # 關閉視窗以及攝影機節省運作資源 cap.release() cv2.destroyAllWindows() ``` ## 再來就是RealSense用Realsense點擊螢幕獲取資訊了 > 學會取得影像座標 > 你就可以試著將RealSense融合進來 > 取得某個點的影像座標以及深度 > 不囉唆直接上程式碼 > [color=#DA0000] ```python= import pyrealsense2 as rs import numpy as np import cv2 pipeline = rs.pipeline() config = rs.config() config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 15) config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 15) pipe_profile = pipeline.start(config) align_to = rs.stream.color align = rs.align(align_to) cv2.namedWindow('RealSense', 0) def get_aligned_images(): frames = pipeline.wait_for_frames() aligned_frames = align.process(frames) aligned_depth_frame = aligned_frames.get_depth_frame() aligned_color_frame = aligned_frames.get_color_frame() depth_intrin = aligned_depth_frame.profile.as_video_stream_profile().intrinsics color_intrin = aligned_color_frame.profile.as_video_stream_profile().intrinsics img_color = np.asanyarray(aligned_color_frame.get_data()) img_depth = np.asanyarray(aligned_depth_frame.get_data()) return color_intrin, depth_intrin, img_color, img_depth, aligned_depth_frame def get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin): x = depth_pixel[0] y = depth_pixel[1] dis = aligned_depth_frame.get_distance(x, y) camera_coordinate = rs.rs2_deproject_pixel_to_point(depth_intrin, depth_pixel, dis) return dis, camera_coordinate def mouse_callback(event, x, y, flags, param): if event == cv2.EVENT_LBUTTONDOWN: # 輸出滑鼠點擊位置的像素座標 print("滑鼠點擊事件座標 (x, y):", x, y) depth_pixel = [x, y] dis, camera_coordinate = get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin) print('深度: ', dis) # 深度單位是m #print('camera_coordinate: ', camera_coordinate) if __name__ == "__main__": while True: color_intrin, depth_intrin, img_color, img_depth, aligned_depth_frame = get_aligned_images() cv2.setMouseCallback('RealSense', mouse_callback) cv2.imshow('RealSense', img_color) key = cv2.waitKey(1) & 0xff if key == ord('q'): break pipeline.stop() cv2.destroyAllWindows() ``` ## ==yolo配合Realsense運用滑鼠點擊返回深度值== ```python= #匯入必要的函式庫 import pyrealsense2 as rs import numpy as np import cv2 import time import tensorflow as tf from PIL import Image from yolo import YOLO, YOLO_ONNX #獲取系統上所有GPU裝置 gpus = tf.config.experimental.list_physical_devices(device_type='GPU') for gpu in gpus: #為每個GPU啟用記憶體增長import argparse from PIL import Image import matplotlib.pyplot as #可以避免Tensorflow一開始就使用所有的GPU記憶體 #而是按照需增加GPU記憶體使用量 tf.config.experimental.set_memory_growth(gpu, True) #創建並啟動realsense pipeline pipeline = rs.pipeline() config = rs.config() #運用深度及彩色串流 config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 15) config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 15) #啟動pipline pipe_profile = pipeline.start(config) #指定對齊目標為彩色串流 align_to = rs.stream.color #將深度串流對齊至彩色串流 align = rs.align(align_to) #創見一個視窗名為RealSense cv2.namedWindow('RealSence',0) def get_aligned_images(): frames = pipeline.wait_for_frames()#從realsense piplin獲取一組新的畫面 aligned_frames = align.process(frames)#將深度影像對齊至彩色影像,並返回一組新的已對齊畫面 aligned_depth_frame = aligned_frames.get_depth_frame()#從已對齊畫面取得深度畫面 aligned_color_frame = aligned_frames.get_color_frame()#從已對齊畫面取得彩色畫面 depth_intrin = aligned_depth_frame.profile.as_video_stream_profile().intrinsics#獲取深度畫面的內部參數,如：主座標 color_intrin = aligned_color_frame.profile.as_video_stream_profile().intrinsics#獲取彩色畫面的內部參數 img_color = np.asanyarray(aligned_color_frame.get_data())#將彩色畫面轉換Numpy陣列 img_depth = np.asanyarray(aligned_depth_frame.get_data())#將深度畫面轉換為Numpy陣列 return color_intrin, depth_intrin, img_color, img_depth, aligned_depth_frame#返回內部參數,彩色圖像,深度圖像及深度畫面 def get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin): #將輸入的深度影像座標分解為x,y部份 x = depth_pixel[0] y = depth_pixel[1] #運用rs SDK get_distance方式取得指定像素深度值 dis = aligned_depth_frame.get_distance(x, y) camera_coordinate = rs.rs2_deproject_pixel_to_point(depth_intrin, depth_pixel, dis)#運用rs SDK rs2_dep....函數將2D像素座標轉換到3D相機座標系中的點 return dis, camera_coordinate def mouse_callback(event, x, y, flags, param): if event == cv2.EVENT_LBUTTONDOWN: print("滑鼠點擊事件座標 (x, y):", x, y) depth_pixel = [x, y] dis, camera_coordinate = get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin) print ('深度: ',dis) #print ('camera_coordinate: ',camera_coordinate) #初始化幀率及YOLO模型 fps = 0.0 yolo = YOLO() #主迴圈 if __name__=="__main__": while True: t1 = time.time()#紀錄當前時間 color_intrin, depth_intrin, img_color, img_depth, aligned_depth_frame = get_aligned_images()# 呼叫 get_aligned_images() 函數並將回傳的結果分別賦值給變數 color_intrin、depth_intrin、img_color、img_depth、aligned_depth_frame frame = cv2.cvtColor(img_color,cv2.COLOR_BGR2RGB)#將輸入BGR圖像轉成RGB圖像 frame = Image.fromarray(np.uint8(frame))#將numpy陣列轉成PIL圖像 frame = np.array(yolo.detect_image(frame))#用YOLO模型進行物件檢測並將結果轉換回 NumPy 陣列 frame = cv2.cvtColor(frame,cv2.COLOR_RGB2BGR)#將RGB圖像轉換回BGR格式 cv2.setMouseCallback('RealSence', mouse_callback) frame = cv2.putText(frame, "fps= %.2f"%(fps), (0, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)# 在圖像上繪製每秒幀數 # 計算每秒幀數及打印出來 fps = ( fps + (1./(time.time()-t1)) ) / 2 print("fps= %.2f"%(fps)) #顯示圖像及等待並獲取用戶輸入 cv2.imshow('RealSence',frame) key = cv2.waitKey(1)&0xff #如果按下q會跳出主迴圈,按下s則暫停畫面更新 if key == ord('q'): break if key == ord('s'): cv2.waitKey(0) pipeline.stop() cv2.destroyAllWindows() ``` {%youtube h1T7rwze-Pc%} ## ==透過修改def detect_image顯示bounding box中心點深度值== ```python= def detect_image(self, image, depth_image,crop = False, count = False): #---------------------------------------------------------# # 在这里将图像转换成RGB图像，防止灰度图在预测时报错。 # 代码仅仅支持RGB图像的预测，所有其它类型的图像都会转化成RGB #---------------------------------------------------------# image = cvtColor(image) #---------------------------------------------------------# # 给图像增加灰条，实现不失真的resize # 也可以直接resize进行识别 #---------------------------------------------------------# image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image) #---------------------------------------------------------# # 添加上batch_size维度，并进行归一化 #---------------------------------------------------------# image_data = np.expand_dims(preprocess_input(np.array(image_data, dtype='float32')), 0) #---------------------------------------------------------# # 将图像输入网络当中进行预测！ #---------------------------------------------------------# input_image_shape = np.expand_dims(np.array([image.size[1], image.size[0]], dtype='float32'), 0) out_boxes, out_scores, out_classes = self.get_pred(image_data, input_image_shape) print('Found {} boxes for {}'.format(len(out_boxes), 'img')) #---------------------------------------------------------# # 设置字体与边框厚度 #---------------------------------------------------------# font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32')) thickness = int(max((image.size[0] + image.size[1]) // np.mean(self.input_shape), 1)) #---------------------------------------------------------# # 计数 #---------------------------------------------------------# if count: print("top_label:", out_classes) classes_nums = np.zeros([self.num_classes]) for i in range(self.num_classes): num = np.sum(out_classes == i) if num > 0: print(self.class_names[i], " : ", num) classes_nums[i] = num print("classes_nums:", classes_nums) #---------------------------------------------------------# # 是否进行目标的裁剪 #---------------------------------------------------------# if crop: for i, c in list(enumerate(out_boxes)): top, left, bottom, right = out_boxes[i] top = max(0, np.floor(top).astype('int32')) left = max(0, np.floor(left).astype('int32')) bottom = min(image.size[1], np.floor(bottom).astype('int32')) right = min(image.size[0], np.floor(right).astype('int32')) dir_save_path = "img_crop" if not os.path.exists(dir_save_path): os.makedirs(dir_save_path) crop_image = image.crop([left, top, right, bottom]) crop_image.save(os.path.join(dir_save_path, "crop_" + str(i) + ".png"), quality=95, subsampling=0) print("save crop_" + str(i) + ".png to " + dir_save_path) #---------------------------------------------------------# # 图像绘制 #---------------------------------------------------------# depth_image = np.array(depth_image)#將深度影像轉成numpy數組 for i, c in list(enumerate(out_classes)): predicted_class = self.class_names[int(c)] box = out_boxes[i] score = out_scores[i] top, left, bottom, right = box top = max(0, np.floor(top).astype('int32')) left = max(0, np.floor(left).astype('int32')) bottom = min(image.size[1], np.floor(bottom).astype('int32')) right = min(image.size[0], np.floor(right).astype('int32')) #計算bounding box中心點 center_x = int((left + right) / 2) center_y = int((top + bottom) / 2) #取得邊界框中心點在深度圖像中的深度值 depth_at_center = depth_image[center_y, center_x] #將結果打印出來 print(f'Depth at center of bounding box: {depth_at_center} mm') label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label, top, left, bottom, right) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[c]) draw.rectangle([tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[c]) draw.text(text_origin, str(label,'UTF-8'), fill=(0, 0, 0), font=font) del draw return image ``` ### ==透過修改def detect_image（主迴圈程式加計時）== ```python= #匯入必要的函式庫 import pyrealsense2 as rs import numpy as np import cv2 import time import tensorflow as tf from PIL import Image from yolo import YOLO, YOLO_ONNX #獲取系統上所有GPU裝置 gpus = tf.config.experimental.list_physical_devices(device_type='GPU') for gpu in gpus: #為每個GPU啟用記憶體增長 #可以避免Tensorflow一開始就使用所有的GPU記憶體 #而是按照需增加GPU記憶體使用量 tf.config.experimental.set_memory_growth(gpu, True) #創建並啟動realsense pipeline pipeline = rs.pipeline() config = rs.config() #運用深度及彩色串流 config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 15) config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 15) #啟動pipline pipe_profile = pipeline.start(config) #指定對齊目標為彩色串流 align_to = rs.stream.color #將深度串流對齊至彩色串流 align = rs.align(align_to) #創見一個視窗名為RealSense cv2.namedWindow('RealSence',0) def get_aligned_images(): frames = pipeline.wait_for_frames()#從realsense piplin獲取一組新的畫面 aligned_frames = align.process(frames)#將深度影像對齊至彩色影像,並返回一組新的已對齊畫面 aligned_depth_frame = aligned_frames.get_depth_frame()#從已對齊畫面取得深度畫面 aligned_color_frame = aligned_frames.get_color_frame()#從已對齊畫面取得彩色畫面 depth_intrin = aligned_depth_frame.profile.as_video_stream_profile().intrinsics#獲取深度畫面的內部參數,如：主座標 color_intrin = aligned_color_frame.profile.as_video_stream_profile().intrinsics#獲取彩色畫面的內部參數 img_color = np.asanyarray(aligned_color_frame.get_data())#將彩色畫面轉換Numpy陣列 img_depth = np.asanyarray(aligned_depth_frame.get_data())#將深度畫面轉換為Numpy陣列 return color_intrin, depth_intrin, img_color, img_depth, aligned_depth_frame#返回內部參數,彩色圖像,深度圖像及深度畫面 def get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin): #將輸入的深度影像座標分解為x,y部份 x = depth_pixel[0] y = depth_pixel[1] #運用rs SDK get_distance方式取得指定像素深度值 dis = aligned_depth_frame.get_distance(x, y) camera_coordinate = rs.rs2_deproject_pixel_to_point(depth_intrin, depth_pixel, dis)#運用rs SDK rs2_dep....函數將2D像素座標轉換到3D相機座標系中的點 return dis, camera_coordinate def mouse_callback(event, x, y, flags, param): if event == cv2.EVENT_LBUTTONDOWN: print("滑鼠點擊事件座標 (x, y):", x, y) depth_pixel = [x, y] dis, camera_coordinate = get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin) print ('深度: ',dis) #print ('camera_coordinate: ',camera_coordinate) #初始化幀率及YOLO模型 fps = 0.0 yolo = YOLO() #主迴圈 if __name__=="__main__": while True: #計時開始 start_time = time.time() t1 = time.time()#紀錄當前時間 color_intrin, depth_intrin, img_color, img_depth, aligned_depth_frame = get_aligned_images()# 呼叫 get_aligned_images() 函數並將回傳的結果分別賦值給變數 color_intrin、depth_intrin、img_color、img_depth、aligned_depth_frame frame = cv2.cvtColor(img_color,cv2.COLOR_BGR2RGB)#將輸入BGR圖像轉成RGB圖像 frame = Image.fromarray(np.uint8(frame))#將numpy陣列轉成PIL圖像 #用YOLO模型進行物件檢測並將結果轉換回 NumPy 陣列 # 增加了 img_depth 参数 frame = np.array(yolo.detect_image(frame, img_depth)) frame = cv2.cvtColor(frame,cv2.COLOR_RGB2BGR)#將RGB圖像轉換回BGR格式 cv2.setMouseCallback('RealSence', mouse_callback) frame = cv2.putText(frame, "fps= %.2f"%(fps), (0, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)# 在圖像上繪製每秒幀數 # 計算每秒幀數及打印出來 fps = ( fps + (1./(time.time()-t1)) ) / 2 print("fps= %.2f"%(fps)) #顯示圖像及等待並獲取用戶輸入 cv2.imshow('RealSence',frame) key = cv2.waitKey(1)&0xff #計時停止 elapsed_time = time.time() - start_time print("Time: %.2f seconds" % elapsed_time) #如果按下q會跳出主迴圈,按下s則暫停畫面更新 if key == ord('q'): break if key == ord('s'): cv2.waitKey(0) pipeline.stop() cv2.destroyAllWindows() ``` {%youtube qe2cwuZ9Obc%} ## ==透過從def detect_image吐出四個值再從主程式修改== ```python= def detect_image(self, image, crop = False, count = False): #---------------------------------------------------------# # 在这里将图像转换成RGB图像，防止灰度图在预测时报错。 # 代码仅仅支持RGB图像的预测，所有其它类型的图像都会转化成RGB #---------------------------------------------------------# image = cvtColor(image) #---------------------------------------------------------# # 给图像增加灰条，实现不失真的resize # 也可以直接resize进行识别 #---------------------------------------------------------# image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image) #---------------------------------------------------------# # 添加上batch_size维度，并进行归一化 #---------------------------------------------------------# image_data = np.expand_dims(preprocess_input(np.array(image_data, dtype='float32')), 0) #---------------------------------------------------------# # 将图像输入网络当中进行预测！ #---------------------------------------------------------# input_image_shape = np.expand_dims(np.array([image.size[1], image.size[0]], dtype='float32'), 0) out_boxes, out_scores, out_classes = self.get_pred(image_data, input_image_shape) print('Found {} boxes for {}'.format(len(out_boxes), 'img')) #---------------------------------------------------------# # 设置字体与边框厚度 #---------------------------------------------------------# font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32')) thickness = int(max((image.size[0] + image.size[1]) // np.mean(self.input_shape), 1)) #---------------------------------------------------------# # 计数 #---------------------------------------------------------# if count: print("top_label:", out_classes) classes_nums = np.zeros([self.num_classes]) for i in range(self.num_classes): num = np.sum(out_classes == i) if num > 0: print(self.class_names[i], " : ", num) classes_nums[i] = num print("classes_nums:", classes_nums) #---------------------------------------------------------# # 是否进行目标的裁剪 #---------------------------------------------------------# if crop: for i, c in list(enumerate(out_boxes)): top, left, bottom, right = out_boxes[i] top = max(0, np.floor(top).astype('int32')) left = max(0, np.floor(left).astype('int32')) bottom = min(image.size[1], np.floor(bottom).astype('int32')) right = min(image.size[0], np.floor(right).astype('int32')) dir_save_path = "img_crop" if not os.path.exists(dir_save_path): os.makedirs(dir_save_path) crop_image = image.crop([left, top, right, bottom]) crop_image.save(os.path.join(dir_save_path, "crop_" + str(i) + ".png"), quality=95, subsampling=0) print("save crop_" + str(i) + ".png to " + dir_save_path) #---------------------------------------------------------# # 图像绘制 #---------------------------------------------------------# for i, c in list(enumerate(out_classes)): predicted_class = self.class_names[int(c)] box = out_boxes[i] score = out_scores[i] top, left, bottom, right = box top = max(0, np.floor(top).astype('int32')) left = max(0, np.floor(left).astype('int32')) bottom = min(image.size[1], np.floor(bottom).astype('int32')) right = min(image.size[0], np.floor(right).astype('int32')) label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label, top, left, bottom, right) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[c]) draw.rectangle([tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[c]) draw.text(text_origin, str(label,'UTF-8'), fill=(0, 0, 0), font=font) del draw return image, out_boxes ``` ### ==predict程式內容== ```python= #匯入必要的函式庫 import pyrealsense2 as rs import numpy as np import cv2 import time import tensorflow as tf from PIL import Image from yolo import YOLO, YOLO_ONNX #獲取系統上所有GPU裝置 gpus = tf.config.experimental.list_physical_devices(device_type='GPU') for gpu in gpus: #為每個GPU啟用記憶體增長 #可以避免Tensorflow一開始就使用所有的GPU記憶體 #而是按照需增加GPU記憶體使用量 tf.config.experimental.set_memory_growth(gpu, True) #創建並啟動realsense pipeline pipeline = rs.pipeline() config = rs.config() #運用深度及彩色串流 config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 15) config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 15) #啟動pipline pipe_profile = pipeline.start(config) #指定對齊目標為彩色串流 align_to = rs.stream.color #將深度串流對齊至彩色串流 align = rs.align(align_to) #創見一個視窗名為RealSense cv2.namedWindow('RealSence',0) def get_aligned_images(): frames = pipeline.wait_for_frames()#從realsense piplin獲取一組新的畫面 aligned_frames = align.process(frames)#將深度影像對齊至彩色影像,並返回一組新的已對齊畫面 aligned_depth_frame = aligned_frames.get_depth_frame()#從已對齊畫面取得深度畫面 aligned_color_frame = aligned_frames.get_color_frame()#從已對齊畫面取得彩色畫面 depth_intrin = aligned_depth_frame.profile.as_video_stream_profile().intrinsics#獲取深度畫面的內部參數,如：主座標 color_intrin = aligned_color_frame.profile.as_video_stream_profile().intrinsics#獲取彩色畫面的內部參數 img_color = np.asanyarray(aligned_color_frame.get_data())#將彩色畫面轉換Numpy陣列 img_depth = np.asanyarray(aligned_depth_frame.get_data())#將深度畫面轉換為Numpy陣列 return color_intrin, depth_intrin, img_color, img_depth, aligned_depth_frame#返回內部參數,彩色圖像,深度圖像及深度畫面 def get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin): #將輸入的深度影像座標分解為x,y部份 x = depth_pixel[0] y = depth_pixel[1] #運用rs SDK get_distance方式取得指定像素深度值 dis = aligned_depth_frame.get_distance(x, y) camera_coordinate = rs.rs2_deproject_pixel_to_point(depth_intrin, depth_pixel, dis)#運用rs SDK rs2_dep....函數將2D像素座標轉換到3D相機座標系中的點 return dis, camera_coordinate def mouse_callback(event, x, y, flags, param): if event == cv2.EVENT_LBUTTONDOWN: print("滑鼠點擊事件座標 (x, y):", x, y) depth_pixel = [x, y] dis, camera_coordinate = get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin) print ('深度: ',dis) #print ('camera_coordinate: ',camera_coordinate) #初始化幀率及YOLO模型 fps = 0.0 yolo = YOLO() #主迴圈 if __name__=="__main__": while True: start_time = time.time() t1 = time.time()#紀錄當前時間 color_intrin, depth_intrin, img_color, img_depth, aligned_depth_frame = get_aligned_images()# 呼叫 get_aligned_images() 函數並將回傳的結果分別賦值給變數 color_intrin、depth_intrin、img_color、img_depth、aligned_depth_frame frame = cv2.cvtColor(img_color,cv2.COLOR_BGR2RGB)#將輸入BGR圖像轉成RGB圖像 frame = Image.fromarray(np.uint8(frame))#將numpy陣列轉成PIL圖像 image, out_boxes = yolo.detect_image(frame) frame = np.array(image)#用YOLO模型進行物件檢測並將結果轉換回 NumPy 陣列 frame = cv2.cvtColor(frame,cv2.COLOR_RGB2BGR)#將RGB圖像轉換回BGR格式 for top, left, bottom, right in out_boxes: # 這裡假設每個 box 的格式是 (top, left, bottom, right) center_x = int((left + right) / 2) center_y = int((top + bottom) / 2) depth_pixel = [center_x, center_y] dis, _ = get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin) print(f"Depth at center of bounding box: {dis}") cv2.setMouseCallback('RealSence', mouse_callback) frame = cv2.putText(frame, "fps= %.2f"%(fps), (0, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)# 在圖像上繪製每秒幀數 # 計算每秒幀數及打印出來 fps = ( fps + (1./(time.time()-t1)) ) / 2 print("fps= %.2f"%(fps)) #顯示圖像及等待並獲取用戶輸入 cv2.imshow('RealSence',frame) key = cv2.waitKey(1)&0xff elapsed_time = time.time() - start_time print("Time: %.2f seconds" % elapsed_time) #如果按下q會跳出主迴圈,按下s則暫停畫面更新 if key == ord('q'): break if key == ord('s'): cv2.waitKey(0) pipeline.stop() cv2.destroyAllWindows() ``` ## 運用抓取中心點深度值程式增加bounding box內取出最大值最小值(predict) ```python= #匯入必要的函式庫 import pyrealsense2 as rs import numpy as np import cv2 import time import tensorflow as tf from PIL import Image from yolo import YOLO, YOLO_ONNX #獲取系統上所有GPU裝置 gpus = tf.config.experimental.list_physical_devices(device_type='GPU') for gpu in gpus: #為每個GPU啟用記憶體增長 #可以避免Tensorflow一開始就使用所有的GPU記憶體 #而是按照需增加GPU記憶體使用量 tf.config.experimental.set_memory_growth(gpu, True) #創建並啟動realsense pipeline pipeline = rs.pipeline() config = rs.config() #運用深度及彩色串流 config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 15) config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 15) #啟動pipline pipe_profile = pipeline.start(config) #指定對齊目標為彩色串流 align_to = rs.stream.color #將深度串流對齊至彩色串流 align = rs.align(align_to) #創見一個視窗名為RealSense cv2.namedWindow('RealSence',0) def get_aligned_images(): frames = pipeline.wait_for_frames()#從realsense piplin獲取一組新的畫面 aligned_frames = align.process(frames)#將深度影像對齊至彩色影像,並返回一組新的已對齊畫面 aligned_depth_frame = aligned_frames.get_depth_frame()#從已對齊畫面取得深度畫面 aligned_color_frame = aligned_frames.get_color_frame()#從已對齊畫面取得彩色畫面 depth_intrin = aligned_depth_frame.profile.as_video_stream_profile().intrinsics#獲取深度畫面的內部參數,如：主座標 color_intrin = aligned_color_frame.profile.as_video_stream_profile().intrinsics#獲取彩色畫面的內部參數 img_color = np.asanyarray(aligned_color_frame.get_data())#將彩色畫面轉換Numpy陣列 img_depth = np.asanyarray(aligned_depth_frame.get_data())#將深度畫面轉換為Numpy陣列 return color_intrin, depth_intrin, img_color, img_depth, aligned_depth_frame#返回內部參數,彩色圖像,深度圖像及深度畫面 def get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin): #將輸入的深度影像座標分解為x,y部份 x = depth_pixel[0] y = depth_pixel[1] #運用rs SDK get_distance方式取得指定像素深度值 dis = aligned_depth_frame.get_distance(x, y) camera_coordinate = rs.rs2_deproject_pixel_to_point(depth_intrin, depth_pixel, dis)#運用rs SDK rs2_dep....函數將2D像素座標轉換到3D相機座標系中的點 return dis, camera_coordinate def mouse_callback(event, x, y, flags, param): if event == cv2.EVENT_LBUTTONDOWN: print("滑鼠點擊事件座標 (x, y):", x, y) depth_pixel = [x, y] dis, camera_coordinate = get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin) print ('深度: ',dis) #print ('camera_coordinate: ',camera_coordinate) #初始化幀率及YOLO模型 fps = 0.0 yolo = YOLO() #主迴圈 if __name__=="__main__": # 確定此程式作為主程式執行 while True: # 持續執行直到使用者退出 start_time = time.time() # 紀錄程式開始的時間 t1 = time.time() # 用來計算 FPS 的時間標記 color_intrin, depth_intrin, img_color, img_depth, aligned_depth_frame = get_aligned_images() # 獲取對齊的影像和內部參數 frame = cv2.cvtColor(img_color,cv2.COLOR_BGR2RGB) # 將 BGR 圖像轉換成 RGB frame = Image.fromarray(np.uint8(frame)) # 將 numpy 陣列轉換成 Image 物件 image, out_boxes = yolo.detect_image(frame) # 使用 YOLO 偵測物件，回傳影像和邊界框 frame = np.array(image) # 將 Image 物件轉換回 numpy 陣列 frame = cv2.cvtColor(frame,cv2.COLOR_RGB2BGR) # 將 RGB 圖像轉換回 BGR # 遍歷所有偵測到的邊界框 for top, left, bottom, right in out_boxes: min_depth = float('inf') # 初始化最小深度為無窮大 max_depth = float('-inf') # 初始化最大深度為無窮小 min_depth_pixel = None # 初始化最小深度的像素點為 None max_depth_pixel = None # 初始化最大深度的像素點為 None # 遍歷邊界框內的所有像素 for y in range(int(top), int(bottom)): for x in range(int(left), int(right)): depth_pixel = [x, y] # 建立像素座標 dis, _ = get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin) # 獲取像素點的深度 # 更新最小和最大深度 if dis < min_depth: min_depth = dis min_depth_pixel = depth_pixel if dis > max_depth: max_depth = dis max_depth_pixel = depth_pixel # 計算邊界框的中心點座標 center_x = int((left + right) / 2) center_y = int((top + bottom) / 2) depth_pixel = [center_x, center_y] center_depth, _ = get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin) # 獲取中心點的深度 # 印出最小、最大和中心深度的資訊 print(f"Minimum depth at: {min_depth_pixel}, value: {min_depth}") print(f"Maximum depth at: {max_depth_pixel}, value: {max_depth}") print(f"Center depth at: {depth_pixel}, value: {center_depth}") cv2.setMouseCallback('RealSence', mouse_callback) # 設定滑鼠回調函數，用來處理滑鼠事件 # 在畫面上顯示 FPS frame = cv2.putText(frame, "fps= %.2f"%(fps), (0, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) # 計算並更新 FPS fps = ( fps + (1./(time.time()-t1)) ) / 2 print("fps= %.2f"%(fps)) # 印出 FPS cv2.imshow('RealSence',frame) # 顯示處理後的畫面 key = cv2.waitKey(1)&0xff # 讀取使用者按下的按鍵 # 計算並印出執行時間 elapsed_time = time.time() - start_time print("Time: %.2f seconds" % elapsed_time) # 如果使用者按下 'q'，則跳出迴圈 if key == ord('q'): break # 如果使用者按下 's'，則等待使用者再次按下按鍵 if key == ord('s'): cv2.waitKey(0) pipeline.stop() cv2.destroyAllWindows() ``` ### def detectimg 程式 ``` python= def detect_image(self, image, crop = False, count = False): #---------------------------------------------------------# # 在这里将图像转换成RGB图像，防止灰度图在预测时报错。 # 代码仅仅支持RGB图像的预测，所有其它类型的图像都会转化成RGB #---------------------------------------------------------# image = cvtColor(image) #---------------------------------------------------------# # 给图像增加灰条，实现不失真的resize # 也可以直接resize进行识别 #---------------------------------------------------------# image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image) #---------------------------------------------------------# # 添加上batch_size维度，并进行归一化 #---------------------------------------------------------# image_data = np.expand_dims(preprocess_input(np.array(image_data, dtype='float32')), 0) #---------------------------------------------------------# # 将图像输入网络当中进行预测！ #---------------------------------------------------------# input_image_shape = np.expand_dims(np.array([image.size[1], image.size[0]], dtype='float32'), 0) out_boxes, out_scores, out_classes = self.get_pred(image_data, input_image_shape) print('Found {} boxes for {}'.format(len(out_boxes), 'img')) #---------------------------------------------------------# # 设置字体与边框厚度 #---------------------------------------------------------# font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32')) thickness = int(max((image.size[0] + image.size[1]) // np.mean(self.input_shape), 1)) #---------------------------------------------------------# # 计数 #---------------------------------------------------------# if count: print("top_label:", out_classes) classes_nums = np.zeros([self.num_classes]) for i in range(self.num_classes): num = np.sum(out_classes == i) if num > 0: print(self.class_names[i], " : ", num) classes_nums[i] = num print("classes_nums:", classes_nums) #---------------------------------------------------------# # 是否进行目标的裁剪 #---------------------------------------------------------# if crop: for i, c in list(enumerate(out_boxes)): top, left, bottom, right = out_boxes[i] top = max(0, np.floor(top).astype('int32')) left = max(0, np.floor(left).astype('int32')) bottom = min(image.size[1], np.floor(bottom).astype('int32')) right = min(image.size[0], np.floor(right).astype('int32')) dir_save_path = "img_crop" if not os.path.exists(dir_save_path): os.makedirs(dir_save_path) crop_image = image.crop([left, top, right, bottom]) crop_image.save(os.path.join(dir_save_path, "crop_" + str(i) + ".png"), quality=95, subsampling=0) print("save crop_" + str(i) + ".png to " + dir_save_path) #---------------------------------------------------------# # 图像绘制 #---------------------------------------------------------# for i, c in list(enumerate(out_classes)): predicted_class = self.class_names[int(c)] box = out_boxes[i] score = out_scores[i] top, left, bottom, right = box top = max(0, np.floor(top).astype('int32')) left = max(0, np.floor(left).astype('int32')) bottom = min(image.size[1], np.floor(bottom).astype('int32')) right = min(image.size[0], np.floor(right).astype('int32')) label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label, top, left, bottom, right) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[c]) draw.rectangle([tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[c]) draw.text(text_origin, str(label,'UTF-8'), fill=(0, 0, 0), font=font) del draw return image, out_boxes ``` ## 兩個bounding box相對位置關係(最近最遠兩個點的(x, y)座標) ``` python= #匯入必要的函式庫 import pyrealsense2 as rs import numpy as np import cv2 import time import tensorflow as tf from PIL import Image from yolo import YOLO, YOLO_ONNX #獲取系統上所有GPU裝置 gpus = tf.config.experimental.list_physical_devices(device_type='GPU') for gpu in gpus: #為每個GPU啟用記憶體增長 #可以避免Tensorflow一開始就使用所有的GPU記憶體 #而是按照需增加GPU記憶體使用量 tf.config.experimental.set_memory_growth(gpu, True) #創建並啟動realsense pipeline pipeline = rs.pipeline() config = rs.config() #運用深度及彩色串流 config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 15) config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 15) #啟動pipline pipe_profile = pipeline.start(config) #指定對齊目標為彩色串流 align_to = rs.stream.color #將深度串流對齊至彩色串流 align = rs.align(align_to) #創見一個視窗名為RealSense cv2.namedWindow('RealSence',0) def get_aligned_images(): frames = pipeline.wait_for_frames()#從realsense piplin獲取一組新的畫面 aligned_frames = align.process(frames)#將深度影像對齊至彩色影像,並返回一組新的已對齊畫面 aligned_depth_frame = aligned_frames.get_depth_frame()#從已對齊畫面取得深度畫面 aligned_color_frame = aligned_frames.get_color_frame()#從已對齊畫面取得彩色畫面 depth_intrin = aligned_depth_frame.profile.as_video_stream_profile().intrinsics#獲取深度畫面的內部參數,如：主座標 color_intrin = aligned_color_frame.profile.as_video_stream_profile().intrinsics#獲取彩色畫面的內部參數 img_color = np.asanyarray(aligned_color_frame.get_data())#將彩色畫面轉換Numpy陣列 img_depth = np.asanyarray(aligned_depth_frame.get_data())#將深度畫面轉換為Numpy陣列 return color_intrin, depth_intrin, img_color, img_depth, aligned_depth_frame#返回內部參數,彩色圖像,深度圖像及深度畫面 def get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin): #將輸入的深度影像座標分解為x,y部份 x = depth_pixel[0] y = depth_pixel[1] #運用rs SDK get_distance方式取得指定像素深度值 dis = aligned_depth_frame.get_distance(x, y) camera_coordinate = rs.rs2_deproject_pixel_to_point(depth_intrin, depth_pixel, dis)#運用rs SDK rs2_dep....函數將2D像素座標轉換到3D相機座標系中的點 return dis, camera_coordinate def mouse_callback(event, x, y, flags, param): if event == cv2.EVENT_LBUTTONDOWN: print("滑鼠點擊事件座標 (x, y):", x, y) depth_pixel = [x, y] dis, camera_coordinate = get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin) print ('深度: ',dis) #print ('camera_coordinate: ',camera_coordinate) #初始化幀率及YOLO模型 fps = 0.0 yolo = YOLO() #主迴圈 if __name__=="__main__": # 確定此程式作為主程式執行 while True: # 持續執行直到使用者退出 start_time = time.time() # 紀錄程式開始的時間 t1 = time.time() # 用來計算 FPS 的時間標記 color_intrin, depth_intrin, img_color, img_depth, aligned_depth_frame = get_aligned_images() # 獲取對齊的影像和內部參數 frame = cv2.cvtColor(img_color,cv2.COLOR_BGR2RGB) # 將 BGR 圖像轉換成 RGB frame = Image.fromarray(np.uint8(frame)) # 將 numpy 陣列轉換成 Image 物件 image, out_boxes = yolo.detect_image(frame) # 使用 YOLO 偵測物件，回傳影像和邊界框 frame = np.array(image) # 將 Image 物件轉換回 numpy 陣列 frame = cv2.cvtColor(frame,cv2.COLOR_RGB2BGR) # 將 RGB 圖像轉換回 BGR # 遍歷所有偵測到的邊界框 center_points = [] for top, left, bottom, right in out_boxes: min_depth = float('inf') # 初始化最小深度為無窮大 max_depth = float('-inf') # 初始化最大深度為無窮小 min_depth_pixel = None # 初始化最小深度的像素點為 None max_depth_pixel = None # 初始化最大深度的像素點為 None # 遍歷邊界框內的所有像素 for y in range(int(top), int(bottom)): for x in range(int(left), int(right)): depth_pixel = [x, y] # 建立像素座標 dis, _ = get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin) # 獲取像素點的深度 # 更新最小和最大深度 if dis > 0: if dis < min_depth: min_depth = dis min_depth_pixel = depth_pixel if dis > max_depth: max_depth = dis max_depth_pixel = depth_pixel # 計算邊界框的中心點座標 center_x = int((left + right) / 2) center_y = int((top + bottom) / 2) depth_pixel = [center_x, center_y] center_depth, _ = get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin) # 獲取中心點的深度 # 印出最小、最大和中心深度的資訊 print(f"Minimum depth at: {min_depth_pixel}, value: {min_depth}") print(f"Maximum depth at: {max_depth_pixel}, value: {max_depth}") print(f"Center depth at: {depth_pixel}, value: {center_depth}") center_points.append((center_x, center_y, center_depth)) #檢查是否有至少兩個偵測到的邊界框，並計算兩個中心點之間的距離 if len(out_boxes) >= 2: center_x1, center_y1, center_depth1 = center_points[0] center_x2, center_y2, center_depth2 = center_points[1] # 計算兩個中心點之間的距離 distance_x = center_x2 - center_x1 distance_y = center_y2 - center_y1 print(f"Center points distance (x, y): ({distance_x}, {distance_y})") cv2.setMouseCallback('RealSence', mouse_callback) # 設定滑鼠回調函數，用來處理滑鼠事件 # 在畫面上顯示 FPS frame = cv2.putText(frame, "fps= %.2f"%(fps), (0, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) # 計算並更新 FPS fps = ( fps + (1./(time.time()-t1)) ) / 2 print("fps= %.2f"%(fps)) # 印出 FPS cv2.imshow('RealSence',frame) # 顯示處理後的畫面 key = cv2.waitKey(1)&0xff # 讀取使用者按下的按鍵 # 計算並印出執行時間 elapsed_time = time.time() - start_time print("Time: %.2f seconds" % elapsed_time) # 如果使用者按下 'q'，則跳出迴圈 if key == ord('q'): break # 如果使用者按下 's'，則等待使用者再次按下按鍵 if key == ord('s'): cv2.waitKey(0) pipeline.stop() cv2.destroyAllWindows() ``` ![](https://hackmd.io/_uploads/H14VbdQjh.png) ![](https://hackmd.io/_uploads/BJatZ_Xj3.png) ## 增加顯示中心點宇最大最小值點 ```python= #匯入必要的函式庫 import pyrealsense2 as rs import numpy as np import cv2 import time import tensorflow as tf from PIL import Image from yolo import YOLO, YOLO_ONNX #獲取系統上所有GPU裝置 gpus = tf.config.experimental.list_physical_devices(device_type='GPU') for gpu in gpus: #為每個GPU啟用記憶體增長 #可以避免Tensorflow一開始就使用所有的GPU記憶體 #而是按照需增加GPU記憶體使用量 tf.config.experimental.set_memory_growth(gpu, True) GREEN_COLOR = (0, 255, 0) RED_COLOR = (0, 0, 255) YELLOW_COLOR = (0, 255, 255) #創建並啟動realsense pipeline pipeline = rs.pipeline() config = rs.config() #運用深度及彩色串流 config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 15) config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 15) #啟動pipline pipe_profile = pipeline.start(config) #指定對齊目標為彩色串流 align_to = rs.stream.color #將深度串流對齊至彩色串流 align = rs.align(align_to) #創見一個視窗名為RealSense cv2.namedWindow('RealSence',0) def get_aligned_images(): frames = pipeline.wait_for_frames()#從realsense piplin獲取一組新的畫面 aligned_frames = align.process(frames)#將深度影像對齊至彩色影像,並返回一組新的已對齊畫面 aligned_depth_frame = aligned_frames.get_depth_frame()#從已對齊畫面取得深度畫面 aligned_color_frame = aligned_frames.get_color_frame()#從已對齊畫面取得彩色畫面 depth_intrin = aligned_depth_frame.profile.as_video_stream_profile().intrinsics#獲取深度畫面的內部參數,如：主座標 color_intrin = aligned_color_frame.profile.as_video_stream_profile().intrinsics#獲取彩色畫面的內部參數 img_color = np.asanyarray(aligned_color_frame.get_data())#將彩色畫面轉換Numpy陣列 img_depth = np.asanyarray(aligned_depth_frame.get_data())#將深度畫面轉換為Numpy陣列 return color_intrin, depth_intrin, img_color, img_depth, aligned_depth_frame#返回內部參數,彩色圖像,深度圖像及深度畫面 def get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin): #將輸入的深度影像座標分解為x,y部份 x = depth_pixel[0] y = depth_pixel[1] # 確保像素座標在有效範圍內 if x < 0 or x >= aligned_depth_frame.width or y < 0 or y >= aligned_depth_frame.height: print("無效的像素座標。") return 0, (0, 0, 0) #運用rs SDK get_distance方式取得指定像素深度值 dis = aligned_depth_frame.get_distance(x, y) camera_coordinate = rs.rs2_deproject_pixel_to_point(depth_intrin, depth_pixel, dis)#運用rs SDK rs2_dep....函數將2D像素座標轉換到3D相機座標系中的點 return dis, camera_coordinate def mouse_callback(event, x, y, flags, param): if event == cv2.EVENT_LBUTTONDOWN: print("滑鼠點擊事件座標 (x, y):", x, y) depth_pixel = [x, y] dis, camera_coordinate = get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin) print ('深度: ',dis) #print ('camera_coordinate: ',camera_coordinate) #初始化幀率及YOLO模型 fps = 0.0 yolo = YOLO() #主迴圈 if __name__=="__main__": # 確定此程式作為主程式執行 while True: # 持續執行直到使用者退出 start_time = time.time() # 紀錄程式開始的時間 t1 = time.time() # 用來計算 FPS 的時間標記 color_intrin, depth_intrin, img_color, img_depth, aligned_depth_frame = get_aligned_images() # 獲取對齊的影像和內部參數 frame = cv2.cvtColor(img_color,cv2.COLOR_BGR2RGB) # 將 BGR 圖像轉換成 RGB frame = Image.fromarray(np.uint8(frame)) # 將 numpy 陣列轉換成 Image 物件 image, out_boxes = yolo.detect_image(frame) # 使用 YOLO 偵測物件，回傳影像和邊界框 frame = np.array(image) # 將 Image 物件轉換回 numpy 陣列 frame = cv2.cvtColor(frame,cv2.COLOR_RGB2BGR) # 將 RGB 圖像轉換回 BGR # 遍歷所有偵測到的邊界框 # 在遍歷邊界框之前，初始化變數來儲存最遠和最近的深度 min_depth_val = float('inf') max_depth_val = float('-inf') min_depth_point = None max_depth_point = None center_points = [] for top, left, bottom, right in out_boxes: min_depth = float('inf') # 初始化最小深度為無窮大 max_depth = float('-inf') # 初始化最大深度為無窮小 min_depth_pixel = None # 初始化最小深度的像素點為 None max_depth_pixel = None # 初始化最大深度的像素點為 None # 遍歷邊界框內的所有像素 for y in range(int(top), int(bottom)): for x in range(int(left), int(right)): depth_pixel = [x, y] # 建立像素座標 dis, _ = get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin) # 獲取像素點的深度 # 更新最小和最大深度 if dis > 0: if dis < min_depth: min_depth = dis min_depth_pixel = depth_pixel if dis > max_depth: max_depth = dis max_depth_pixel = depth_pixel if min_depth_pixel is not None: min_depth_val = min_depth min_depth_point = min_depth_pixel if max_depth_pixel is not None: max_depth_val = max_depth max_depth_point = max_depth_pixel # 計算邊界框的中心點座標 center_x = int((left + right) / 2) center_y = int((top + bottom) / 2) depth_pixel = [center_x, center_y] center_depth, _ = get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin) # 獲取中心點的深度 # 印出最小、最大和中心深度的資訊 print(f"Minimum depth at: {min_depth_pixel}, value: {min_depth}") print(f"Maximum depth at: {max_depth_pixel}, value: {max_depth}") print(f"Center depth at: {depth_pixel}, value: {center_depth}") # 在畫面上標示中心點及最大最小深度值點 cv2.circle(frame, (center_x, center_y), 5, GREEN_COLOR, -1) cv2.circle(frame, (min_depth_point[0], min_depth_point[1]), 5, RED_COLOR, -1) cv2.circle(frame, (max_depth_point[0], max_depth_point[1]), 5, YELLOW_COLOR, -1) center_points.append((center_x, center_y, center_depth)) #檢查是否有至少兩個偵測到的邊界框，並計算兩個中心點之間的距離 if len(out_boxes) >= 2: center_x1, center_y1, center_depth1 = center_points[0] center_x2, center_y2, center_depth2 = center_points[1] # 計算兩個中心點之間的距離 distance_x = center_x2 - center_x1 distance_y = center_y2 - center_y1 print(f"Center points distance (x, y): ({distance_x}, {distance_y})") cv2.setMouseCallback('RealSence', mouse_callback) # 設定滑鼠回調函數，用來處理滑鼠事件 # 在畫面上顯示 FPS frame = cv2.putText(frame, "fps= %.2f"%(fps), (0, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) # 計算並更新 FPS fps = ( fps + (1./(time.time()-t1)) ) / 2 print("fps= %.2f"%(fps)) # 印出 FPS cv2.imshow('RealSence',frame) # 顯示處理後的畫面 key = cv2.waitKey(1)&0xff # 讀取使用者按下的按鍵 # 計算並印出執行時間 elapsed_time = time.time() - start_time print("Time: %.2f seconds" % elapsed_time) # 如果使用者按下 'q'，則跳出迴圈 if key == ord('q'): break # 如果使用者按下 's'，則等待使用者再次按下按鍵 if key == ord('s'): cv2.waitKey(0) pipeline.stop() cv2.destroyAllWindows() ``` {%youtube 1c-pVENgkVQ%} ## 改善fps ```python= #匯入必要的函式庫 import pyrealsense2 as rs import numpy as np import cv2 import time import tensorflow as tf from PIL import Image from yolo import YOLO, YOLO_ONNX #獲取系統上所有GPU裝置 gpus = tf.config.experimental.list_physical_devices(device_type='GPU') for gpu in gpus: #為每個GPU啟用記憶體增長 #可以避免Tensorflow一開始就使用所有的GPU記憶體 #而是按照需增加GPU記憶體使用量 tf.config.experimental.set_memory_growth(gpu, True) GREEN_COLOR = (0, 255, 0) RED_COLOR = (0, 0, 255) YELLOW_COLOR = (0, 255, 255) #創建並啟動realsense pipeline pipeline = rs.pipeline() config = rs.config() #運用深度及彩色串流 config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 15) config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 15) #啟動pipline pipe_profile = pipeline.start(config) #指定對齊目標為彩色串流 align_to = rs.stream.color #將深度串流對齊至彩色串流 align = rs.align(align_to) #創見一個視窗名為RealSense cv2.namedWindow('RealSence',0) def get_aligned_images(): frames = pipeline.wait_for_frames()#從realsense piplin獲取一組新的畫面 aligned_frames = align.process(frames)#將深度影像對齊至彩色影像,並返回一組新的已對齊畫面 aligned_depth_frame = aligned_frames.get_depth_frame()#從已對齊畫面取得深度畫面 aligned_color_frame = aligned_frames.get_color_frame()#從已對齊畫面取得彩色畫面 depth_intrin = aligned_depth_frame.profile.as_video_stream_profile().intrinsics#獲取深度畫面的內部參數,如：主座標 color_intrin = aligned_color_frame.profile.as_video_stream_profile().intrinsics#獲取彩色畫面的內部參數 img_color = np.asanyarray(aligned_color_frame.get_data())#將彩色畫面轉換Numpy陣列 img_depth = np.asanyarray(aligned_depth_frame.get_data())#將深度畫面轉換為Numpy陣列 # 將彩色圖像調整為較小的解析度 resized_color = cv2.resize(img_color, (320, 240)) return color_intrin, depth_intrin, img_color, img_depth, aligned_depth_frame, resized_color#返回內部參數,彩色圖像,深度圖像及深度畫面 def get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin): #將輸入的深度影像座標分解為x,y部份 x = depth_pixel[0] y = depth_pixel[1] # 確保像素座標在有效範圍內 if x < 0 or x >= aligned_depth_frame.width or y < 0 or y >= aligned_depth_frame.height: print("無效的像素座標。") return 0, (0, 0, 0) #運用rs SDK get_distance方式取得指定像素深度值 dis = aligned_depth_frame.get_distance(x, y) camera_coordinate = rs.rs2_deproject_pixel_to_point(depth_intrin, depth_pixel, dis)#運用rs SDK rs2_dep....函數將2D像素座標轉換到3D相機座標系中的點 return dis, camera_coordinate def mouse_callback(event, x, y, flags, param): if event == cv2.EVENT_LBUTTONDOWN: print("滑鼠點擊事件座標 (x, y):", x, y) depth_pixel = [x, y] dis, camera_coordinate = get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin) print ('深度: ',dis) #print ('camera_coordinate: ',camera_coordinate) #初始化幀率及YOLO模型 fps = 0.0 yolo = YOLO() yolo.confidence = 0.3 frame_skip = 5 skip_counter = 0 #主迴圈 if __name__=="__main__": # 確定此程式作為主程式執行 while True: # 持續執行直到使用者退出 start_time = time.time() # 紀錄程式開始的時間 t1 = time.time() color_intrin, depth_intrin, img_color, img_depth, aligned_depth_frame , resized_color= get_aligned_images() # 獲取對齊的影像和內部參數 frame = cv2.cvtColor(img_color,cv2.COLOR_BGR2RGB) # 將 BGR 圖像轉換成 RGB frame = Image.fromarray(np.uint8(frame)) # 將 numpy 陣列轉換成 Image 物件 image, out_boxes = yolo.detect_image(frame) # 使用 YOLO 偵測物件，回傳影像和邊界框 frame = np.array(image) # 將 Image 物件轉換回 numpy 陣列 frame = cv2.cvtColor(frame,cv2.COLOR_RGB2BGR) # 將 RGB 圖像轉換回 BGR # 遍歷所有偵測到的邊界框 # 在遍歷邊界框之前，初始化變數來儲存最遠和最近的深度 min_depth_val = float('inf') max_depth_val = float('-inf') min_depth_point = None max_depth_point = None center_points = [] for top, left, bottom, right in out_boxes: min_depth = float('inf') # 初始化最小深度為無窮大 max_depth = float('-inf') # 初始化最大深度為無窮小 min_depth_pixel = None # 初始化最小深度的像素點為 None max_depth_pixel = None # 初始化最大深度的像素點為 None # 遍歷邊界框內的所有像素 for y in range(int(top), int(bottom)): for x in range(int(left), int(right)): depth_pixel = [x, y] # 建立像素座標 dis, _ = get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin) # 獲取像素點的深度 # 更新最小和最大深度 if dis > 0: if dis < min_depth: min_depth = dis min_depth_pixel = depth_pixel if dis > max_depth: max_depth = dis max_depth_pixel = depth_pixel if min_depth_pixel is not None: min_depth_val = min_depth min_depth_point = min_depth_pixel if max_depth_pixel is not None: max_depth_val = max_depth max_depth_point = max_depth_pixel # 計算邊界框的中心點座標 center_x = int((left + right) / 2) center_y = int((top + bottom) / 2) depth_pixel = [center_x, center_y] center_depth, _ = get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin) # 獲取中心點的深度 # 印出最小、最大和中心深度的資訊 print(f"Minimum depth at: {min_depth_pixel}, value: {min_depth}") print(f"Maximum depth at: {max_depth_pixel}, value: {max_depth}") print(f"Center depth at: {depth_pixel}, value: {center_depth}") # 在畫面上標示中心點及最大最小深度值點 cv2.circle(frame, (center_x, center_y), 5, GREEN_COLOR, -1) cv2.circle(frame, (min_depth_point[0], min_depth_point[1]), 5, RED_COLOR, -1) cv2.circle(frame, (max_depth_point[0], max_depth_point[1]), 5, YELLOW_COLOR, -1) center_points.append((center_x, center_y, center_depth)) #檢查是否有至少兩個偵測到的邊界框，並計算兩個中心點之間的距離 if len(out_boxes) >= 2: center_x1, center_y1, center_depth1 = center_points[0] center_x2, center_y2, center_depth2 = center_points[1] # 計算兩個中心點之間的距離 distance_x = center_x2 - center_x1 distance_y = center_y2 - center_y1 print(f"Center points distance (x, y): ({distance_x}, {distance_y})") cv2.setMouseCallback('RealSence', mouse_callback) # 設定滑鼠回調函數，用來處理滑鼠事件 # 在畫面上顯示 FPS frame = cv2.putText(frame, "fps= %.2f"%(fps), (0, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) # 計算並更新 FPS fps = ( fps + (1./(time.time()-t1)) ) / 2 print("fps= %.2f"%(fps)) # 印出 FPS cv2.imshow('RealSence',frame) # 顯示處理後的畫面 key = cv2.waitKey(1)&0xff # 讀取使用者按下的按鍵 # 計算並印出執行時間 elapsed_time = time.time() - start_time print("Time: %.2f seconds" % elapsed_time) # 如果使用者按下 'q'，則跳出迴圈 if key == ord('q'): break # 如果使用者按下 's'，則等待使用者再次按下按鍵 if key == ord('s'): cv2.waitKey(0) pipeline.stop() cv2.destroyAllWindows() ``` ## ==運用抓取物體輪廓、邊緣技術來得知物體最大最小深度== ```python= #匯入必要的函式庫 import pyrealsense2 as rs import numpy as np import cv2 import time import tensorflow as tf from PIL import Image from yolo import YOLO, YOLO_ONNX #獲取系統上所有GPU裝置 gpus = tf.config.experimental.list_physical_devices(device_type='GPU') for gpu in gpus: #為每個GPU啟用記憶體增長 #可以避免Tensorflow一開始就使用所有的GPU記憶體 #而是按照需增加GPU記憶體使用量 tf.config.experimental.set_memory_growth(gpu, True) GREEN_COLOR = (0, 255, 0) RED_COLOR = (0, 0, 255) YELLOW_COLOR = (0, 255, 255) #創建並啟動realsense pipeline pipeline = rs.pipeline() config = rs.config() #運用深度及彩色串流 config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 15) config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 15) #啟動pipline pipe_profile = pipeline.start(config) #指定對齊目標為彩色串流 align_to = rs.stream.color #將深度串流對齊至彩色串流 align = rs.align(align_to) #創見一個視窗名為RealSense cv2.namedWindow('RealSence',0) def get_aligned_images(): frames = pipeline.wait_for_frames()#從realsense piplin獲取一組新的畫面 aligned_frames = align.process(frames)#將深度影像對齊至彩色影像,並返回一組新的已對齊畫面 aligned_depth_frame = aligned_frames.get_depth_frame()#從已對齊畫面取得深度畫面 aligned_color_frame = aligned_frames.get_color_frame()#從已對齊畫面取得彩色畫面 depth_intrin = aligned_depth_frame.profile.as_video_stream_profile().intrinsics#獲取深度畫面的內部參數,如：主座標 color_intrin = aligned_color_frame.profile.as_video_stream_profile().intrinsics#獲取彩色畫面的內部參數 img_color = np.asanyarray(aligned_color_frame.get_data())#將彩色畫面轉換Numpy陣列 img_depth = np.asanyarray(aligned_depth_frame.get_data())#將深度畫面轉換為Numpy陣列 # 將彩色圖像調整為較小的解析度 resized_color = cv2.resize(img_color, (320, 240)) return color_intrin, depth_intrin, img_color, img_depth, aligned_depth_frame, resized_color#返回內部參數,彩色圖像,深度圖像及深度畫面 def get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin): #將輸入的深度影像座標分解為x,y部份 x = depth_pixel[0] y = depth_pixel[1] # 確保像素座標在有效範圍內 if x < 0 or x >= aligned_depth_frame.width or y < 0 or y >= aligned_depth_frame.height: print("無效的像素座標。") return 0, (0, 0, 0) #運用rs SDK get_distance方式取得指定像素深度值 dis = aligned_depth_frame.get_distance(x, y) camera_coordinate = rs.rs2_deproject_pixel_to_point(depth_intrin, depth_pixel, dis)#運用rs SDK rs2_dep....函數將2D像素座標轉換到3D相機座標系中的點 return dis, camera_coordinate def mouse_callback(event, x, y, flags, param): if event == cv2.EVENT_LBUTTONDOWN: print("滑鼠點擊事件座標 (x, y):", x, y) depth_pixel = [x, y] dis, camera_coordinate = get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin) print ('深度: ',dis) #print ('camera_coordinate: ',camera_coordinate) #初始化幀率及YOLO模型 fps = 0.0 yolo = YOLO() yolo.confidence = 0.3 frame_skip = 5 skip_counter = 0 #主迴圈 if __name__=="__main__": # 確定此程式作為主程式執行 while True: # 持續執行直到使用者退出 start_time = time.time() # 紀錄程式開始的時間 t1 = time.time() color_intrin, depth_intrin, img_color, img_depth, aligned_depth_frame , resized_color= get_aligned_images() # 獲取對齊的影像和內部參數 frame = cv2.cvtColor(img_color,cv2.COLOR_BGR2RGB) # 將 BGR 圖像轉換成 RGB frame = Image.fromarray(np.uint8(frame)) # 將 numpy 陣列轉換成 Image 物件 image, out_boxes = yolo.detect_image(frame) # 使用 YOLO 偵測物件，回傳影像和邊界框 frame = np.array(image) # 將 Image 物件轉換回 numpy 陣列 frame = cv2.cvtColor(frame,cv2.COLOR_RGB2BGR) # 將 RGB 圖像轉換回 BGR # 遍歷所有偵測到的邊界框 # 在遍歷邊界框之前，初始化變數來儲存最遠和最近的深度 min_depth_val = float('inf') max_depth_val = float('-inf') min_depth_point = None max_depth_point = None center_points = [] for top, left, bottom, right in out_boxes: roi_color = img_color[int(top):int(bottom), int(left):int(right)] roi_gray = cv2.cvtColor(roi_color, cv2.COLOR_BGR2GRAY) edges = cv2.Canny(roi_gray, 50, 150) min_depth = float('inf') # 初始化最小深度為無窮大 max_depth = float('-inf') # 初始化最大深度為無窮小 min_depth_pixel = None # 初始化最小深度的像素點為 None max_depth_pixel = None # 初始化最大深度的像素點為 None # 遍歷邊界框內的所有像素 edge_points = np.argwhere(edges > 0) for point in edge_points: y, x = point depth_pixel = [int(left) + x, int(top) + y] dis, _ = get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin) # 更新最小和最大深度 if dis > 0: if dis < min_depth: min_depth = dis min_depth_pixel = depth_pixel if dis > max_depth: max_depth = dis max_depth_pixel = depth_pixel if min_depth_pixel is not None: cv2.circle(frame, (min_depth_pixel[0], min_depth_pixel[1]), 5, RED_COLOR, -1) if max_depth_pixel is not None: cv2.circle(frame, (max_depth_pixel[0], max_depth_pixel[1]), 5, YELLOW_COLOR, -1) # 計算邊界框的中心點座標 center_x = int((left + right) / 2) center_y = int((top + bottom) / 2) depth_pixel = [center_x, center_y] center_depth, _ = get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin) # 獲取中心點的深度 # 印出最小、最大和中心深度的資訊 print(f"Minimum depth at: {min_depth_pixel}, value: {min_depth}") print(f"Maximum depth at: {max_depth_pixel}, value: {max_depth}") print(f"Center depth at: {depth_pixel}, value: {center_depth}") # 在畫面上標示中心點及最大最小深度值點 cv2.circle(frame, (center_x, center_y), 5, GREEN_COLOR, -1) center_points.append((center_x, center_y, center_depth)) #檢查是否有至少兩個偵測到的邊界框，並計算兩個中心點之間的距離 if len(out_boxes) >= 2: center_x1, center_y1, center_depth1 = center_points[0] center_x2, center_y2, center_depth2 = center_points[1] # 計算兩個中心點之間的距離 distance_x = center_x2 - center_x1 distance_y = center_y2 - center_y1 print(f"Center points distance (x, y): ({distance_x}, {distance_y})") cv2.setMouseCallback('RealSence', mouse_callback) # 設定滑鼠回調函數，用來處理滑鼠事件 # 在畫面上顯示 FPS frame = cv2.putText(frame, "fps= %.2f"%(fps), (0, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) # 計算並更新 FPS fps = ( fps + (1./(time.time()-t1)) ) / 2 print("fps= %.2f"%(fps)) # 印出 FPS cv2.imshow('RealSence',frame) # 顯示處理後的畫面 key = cv2.waitKey(1)&0xff # 讀取使用者按下的按鍵 # 計算並印出執行時間 elapsed_time = time.time() - start_time print("Time: %.2f seconds" % elapsed_time) # 如果使用者按下 'q'，則跳出迴圈 if key == ord('q'): break # 如果使用者按下 's'，則等待使用者再次按下按鍵 if key == ord('s'): cv2.waitKey(0) pipeline.stop() cv2.destroyAllWindows() ``` {%youtube MipJxfCVfQM%} ## ==運用前後景分離方式取得物體最大最小的點及中心值得點== ```python= #匯入必要的函式庫 import pyrealsense2 as rs import numpy as np import cv2 import time import tensorflow as tf from PIL import Image from yolo import YOLO, YOLO_ONNX #獲取系統上所有GPU裝置 gpus = tf.config.experimental.list_physical_devices(device_type='GPU') for gpu in gpus: #為每個GPU啟用記憶體增長 #可以避免Tensorflow一開始就使用所有的GPU記憶體 #而是按照需增加GPU記憶體使用量 tf.config.experimental.set_memory_growth(gpu, True) GREEN_COLOR = (0, 255, 0) RED_COLOR = (0, 0, 255) YELLOW_COLOR = (0, 255, 255) #創建並啟動realsense pipeline pipeline = rs.pipeline() config = rs.config() #運用深度及彩色串流 config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 15) config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 15) #啟動pipline pipe_profile = pipeline.start(config) #指定對齊目標為彩色串流 align_to = rs.stream.color #將深度串流對齊至彩色串流 align = rs.align(align_to) #創見一個視窗名為RealSense cv2.namedWindow('RealSence',0) def get_aligned_images(): frames = pipeline.wait_for_frames()#從realsense piplin獲取一組新的畫面 aligned_frames = align.process(frames)#將深度影像對齊至彩色影像,並返回一組新的已對齊畫面 aligned_depth_frame = aligned_frames.get_depth_frame()#從已對齊畫面取得深度畫面 aligned_color_frame = aligned_frames.get_color_frame()#從已對齊畫面取得彩色畫面 depth_intrin = aligned_depth_frame.profile.as_video_stream_profile().intrinsics#獲取深度畫面的內部參數,如：主座標 color_intrin = aligned_color_frame.profile.as_video_stream_profile().intrinsics#獲取彩色畫面的內部參數 img_color = np.asanyarray(aligned_color_frame.get_data())#將彩色畫面轉換Numpy陣列 img_depth = np.asanyarray(aligned_depth_frame.get_data())#將深度畫面轉換為Numpy陣列 # 將彩色圖像調整為較小的解析度 resized_color = cv2.resize(img_color, (320, 240)) return color_intrin, depth_intrin, img_color, img_depth, aligned_depth_frame, resized_color#返回內部參數,彩色圖像,深度圖像及深度畫面 def get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin): #將輸入的深度影像座標分解為x,y部份 x = depth_pixel[0] y = depth_pixel[1] # 確保像素座標在有效範圍內 if x < 0 or x >= aligned_depth_frame.width or y < 0 or y >= aligned_depth_frame.height: print("無效的像素座標。") return 0, (0, 0, 0) #運用rs SDK get_distance方式取得指定像素深度值 dis = aligned_depth_frame.get_distance(x, y) camera_coordinate = rs.rs2_deproject_pixel_to_point(depth_intrin, depth_pixel, dis)#運用rs SDK rs2_dep....函數將2D像素座標轉換到3D相機座標系中的點 return dis, camera_coordinate def mouse_callback(event, x, y, flags, param): if event == cv2.EVENT_LBUTTONDOWN: print("滑鼠點擊事件座標 (x, y):", x, y) depth_pixel = [x, y] dis, camera_coordinate = get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin) print ('深度: ',dis) #print ('camera_coordinate: ',camera_coordinate) #初始化幀率及YOLO模型 fps = 0.0 yolo = YOLO() yolo.confidence = 0.3 frame_skip = 5 skip_counter = 0 fgbg = cv2.createBackgroundSubtractorMOG2() #主迴圈 if __name__=="__main__": # 確定此程式作為主程式執行 while True: # 持續執行直到使用者退出 start_time = time.time() # 紀錄程式開始的時間 t1 = time.time() color_intrin, depth_intrin, img_color, img_depth, aligned_depth_frame , resized_color= get_aligned_images() # 獲取對齊的影像和內部參數 # 進行前後景分離 fgmask = fgbg.apply(img_color) # 將前景mask應用到彩色圖像上，以得到只有前景的彩色圖像 fgimg = cv2.bitwise_and(img_color, img_color, mask=fgmask) # 你可以選擇對fgimg（前景圖像）或img_color（原始圖像）進行後續處理 frame = cv2.cvtColor(fgimg, cv2.COLOR_BGR2RGB) frame = cv2.cvtColor(img_color,cv2.COLOR_BGR2RGB) # 將 BGR 圖像轉換成 RGB frame = Image.fromarray(np.uint8(frame)) # 將 numpy 陣列轉換成 Image 物件 image, out_boxes = yolo.detect_image(frame) # 使用 YOLO 偵測物件，回傳影像和邊界框 frame = np.array(image) # 將 Image 物件轉換回 numpy 陣列 frame = cv2.cvtColor(frame,cv2.COLOR_RGB2BGR) # 將 RGB 圖像轉換回 BGR # 遍歷所有偵測到的邊界框 # 在遍歷邊界框之前，初始化變數來儲存最遠和最近的深度 min_depth_val = float('inf') max_depth_val = float('-inf') min_depth_point = None max_depth_point = None center_points = [] for top, left, bottom, right in out_boxes: min_depth = float('inf') # 初始化最小深度為無窮大 max_depth = float('-inf') # 初始化最大深度為無窮小 min_depth_pixel = None # 初始化最小深度的像素點為 None max_depth_pixel = None # 初始化最大深度的像素點為 None # 遍歷邊界框內的所有像素 dis = 0 for y in range(int(top), int(bottom)): for x in range(int(left), int(right)): if fgmask[y, x] != 0: depth_pixel = [x, y] # 獲取深度值，這取決於你的深度圖像的表示 dis = aligned_depth_frame.get_distance(x, y) # 只是一個示例，你可能需要自己的深度獲取方法 # 更新最小和最大深度 if dis > 0: if dis < min_depth: min_depth = dis min_depth_pixel = depth_pixel if dis > max_depth: max_depth = dis max_depth_pixel = depth_pixel if min_depth_pixel is not None: min_depth_val = min_depth min_depth_point = min_depth_pixel if max_depth_pixel is not None: max_depth_val = max_depth max_depth_point = max_depth_pixel # 計算邊界框的中心點座標 center_x = int((left + right) / 2) center_y = int((top + bottom) / 2) depth_pixel = [center_x, center_y] center_depth, _ = get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin) # 獲取中心點的深度 # 印出最小、最大和中心深度的資訊 print(f"Minimum depth at: {min_depth_pixel}, value: {min_depth}") print(f"Maximum depth at: {max_depth_pixel}, value: {max_depth}") print(f"Center depth at: {depth_pixel}, value: {center_depth}") # 在畫面上標示中心點及最大最小深度值點 cv2.circle(frame, (center_x, center_y), 5, GREEN_COLOR, -1) if min_depth_point: cv2.circle(frame, (min_depth_point[0], min_depth_point[1]), 5, RED_COLOR, -1) if max_depth_point: cv2.circle(frame, (max_depth_point[0], max_depth_point[1]), 5, YELLOW_COLOR, -1) center_points.append((center_x, center_y, center_depth)) #檢查是否有至少兩個偵測到的邊界框，並計算兩個中心點之間的距離 if len(out_boxes) >= 2: center_x1, center_y1, center_depth1 = center_points[0] center_x2, center_y2, center_depth2 = center_points[1] # 計算兩個中心點之間的距離 distance_x = center_x2 - center_x1 distance_y = center_y2 - center_y1 print(f"Center points distance (x, y): ({distance_x}, {distance_y})") cv2.setMouseCallback('RealSence', mouse_callback) # 設定滑鼠回調函數，用來處理滑鼠事件 # 在畫面上顯示 FPS frame = cv2.putText(frame, "fps= %.2f"%(fps), (0, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) # 計算並更新 FPS fps = ( fps + (1./(time.time()-t1)) ) / 2 print("fps= %.2f"%(fps)) # 印出 FPS cv2.imshow('RealSence',frame) # 顯示處理後的畫面 cv2.imshow('Foreground', fgimg) key = cv2.waitKey(1)&0xff # 讀取使用者按下的按鍵 # 計算並印出執行時間 elapsed_time = time.time() - start_time print("Time: %.2f seconds" % elapsed_time) # 如果使用者按下 'q'，則跳出迴圈 if key == ord('q'): break # 如果使用者按下 's'，則等待使用者再次按下按鍵 if key == ord('s'): cv2.waitKey(0) pipeline.stop() cv2.destroyAllWindows() ``` {%youtube 4pSuyYCnBjc%} ## 直接顯示出要的內容再右邊視窗 ```python= #匯入必要的函式庫 import pyrealsense2 as rs import numpy as np import cv2 import time import tensorflow as tf from PIL import Image from yolo import YOLO, YOLO_ONNX #獲取系統上所有GPU裝置 gpus = tf.config.experimental.list_physical_devices(device_type='GPU') for gpu in gpus: #為每個GPU啟用記憶體增長 #可以避免Tensorflow一開始就使用所有的GPU記憶體 #而是按照需增加GPU記憶體使用量 tf.config.experimental.set_memory_growth(gpu, True) GREEN_COLOR = (0, 255, 0) RED_COLOR = (0, 0, 255) YELLOW_COLOR = (0, 255, 255) # 創建一個足夠大的畫布 canvas_height, canvas_width = 480, 1280 canvas = np.zeros((canvas_height, canvas_width, 3), dtype=np.uint8) # 設定左半邊和右半邊的區域 left_half = canvas[:, :canvas_width // 2, :] right_half = canvas[:, canvas_width // 2:, :] #創建並啟動realsense pipeline pipeline = rs.pipeline() config = rs.config() #運用深度及彩色串流 config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 15) config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 15) #啟動pipline pipe_profile = pipeline.start(config) #指定對齊目標為彩色串流 align_to = rs.stream.color #將深度串流對齊至彩色串流 align = rs.align(align_to) #創見一個視窗名為RealSense cv2.namedWindow('RealSence',0) def get_aligned_images(): frames = pipeline.wait_for_frames()#從realsense piplin獲取一組新的畫面 aligned_frames = align.process(frames)#將深度影像對齊至彩色影像,並返回一組新的已對齊畫面 aligned_depth_frame = aligned_frames.get_depth_frame()#從已對齊畫面取得深度畫面 aligned_color_frame = aligned_frames.get_color_frame()#從已對齊畫面取得彩色畫面 depth_intrin = aligned_depth_frame.profile.as_video_stream_profile().intrinsics#獲取深度畫面的內部參數,如：主座標 color_intrin = aligned_color_frame.profile.as_video_stream_profile().intrinsics#獲取彩色畫面的內部參數 img_color = np.asanyarray(aligned_color_frame.get_data())#將彩色畫面轉換Numpy陣列 img_depth = np.asanyarray(aligned_depth_frame.get_data())#將深度畫面轉換為Numpy陣列 # 將彩色圖像調整為較小的解析度 resized_color = cv2.resize(img_color, (320, 240)) return color_intrin, depth_intrin, img_color, img_depth, aligned_depth_frame, resized_color#返回內部參數,彩色圖像,深度圖像及深度畫面 def get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin): #將輸入的深度影像座標分解為x,y部份 x = depth_pixel[0] y = depth_pixel[1] # 確保像素座標在有效範圍內 if x < 0 or x >= aligned_depth_frame.width or y < 0 or y >= aligned_depth_frame.height: print("無效的像素座標。") return 0, (0, 0, 0) #運用rs SDK get_distance方式取得指定像素深度值 dis = aligned_depth_frame.get_distance(x, y) camera_coordinate = rs.rs2_deproject_pixel_to_point(depth_intrin, depth_pixel, dis)#運用rs SDK rs2_dep....函數將2D像素座標轉換到3D相機座標系中的點 return dis, camera_coordinate def mouse_callback(event, x, y, flags, param): if event == cv2.EVENT_LBUTTONDOWN: print("滑鼠點擊事件座標 (x, y):", x, y) depth_pixel = [x, y] dis, camera_coordinate = get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin) print ('深度: ',dis) #print ('camera_coordinate: ',camera_coordinate) #初始化幀率及YOLO模型 fps = 0.0 yolo = YOLO() yolo.confidence = 0.3 frame_skip = 5 skip_counter = 0 #主迴圈 if __name__=="__main__": # 確定此程式作為主程式執行 while True: # 持續執行直到使用者退出 start_time = time.time() # 紀錄程式開始的時間 t1 = time.time() color_intrin, depth_intrin, img_color, img_depth, aligned_depth_frame, resized_color = get_aligned_images() # 獲取對齊的影像和內部參數 frame = cv2.cvtColor(img_color, cv2.COLOR_BGR2RGB) # 將 BGR 圖像轉換成 RGB frame = Image.fromarray(np.uint8(frame)) # 將 numpy 陣列轉換成 Image 物件 image, out_boxes = yolo.detect_image(frame) # 使用 YOLO 偵測物件，只回傳邊界框 frame = np.array(image) # 將 Image 物件轉換回 numpy 陣列 frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) # 將影像繪製到左半邊 # 遍歷所有偵測到的邊界框，並根據需要將它們分配到左半邊或右半邊 for i, (top, left, bottom, right) in enumerate(out_boxes): object_class = yolo.class_names[i] # 獲取物品的種類 text = f"Object {i + 1}: {object_class}" cv2.putText(left_half, text, (50, 50 + i * 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2) # 在左半邊的彩色影像上顯示物品偵測結果 for i, (top, left, bottom, right) in enumerate(out_boxes): object_class = yolo.class_names[i] # 獲取物品的種類 text = f"Object {i + 1}: {object_class}" cv2.putText(left_half, text, (50, 50 + i * 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2) # 在右半邊的區域上顯示右半邊的物品名稱 for i, (top, left, bottom, right) in enumerate(out_boxes): object_class = yolo.class_names[i] # 獲取物品的種類 text = f"Object {i + 1}: {object_class}" cv2.putText(right_half, text, (50, 50 + i * 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2) left_half[:, :, :] = frame # 遍歷所有偵測到的邊界框 # 在遍歷邊界框之前，初始化變數來儲存最遠和最近的深度 min_depth_val = float('inf') max_depth_val = float('-inf') min_depth_point = None max_depth_point = None center_points = [] for top, left, bottom, right in out_boxes: min_depth = float('inf') # 初始化最小深度為無窮大 max_depth = float('-inf') # 初始化最大深度為無窮小 min_depth_pixel = None # 初始化最小深度的像素點為 None max_depth_pixel = None # 初始化最大深度的像素點為 None # 遍歷邊界框內的所有像素 for y in range(int(top), int(bottom)): for x in range(int(left), int(right)): depth_pixel = [x, y] # 建立像素座標 dis, _ = get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin) # 獲取像素點的深度 # 更新最小和最大深度 if dis > 0: if dis < min_depth: min_depth = dis min_depth_pixel = depth_pixel if dis > max_depth: max_depth = dis max_depth_pixel = depth_pixel if min_depth_pixel is not None: min_depth_val = min_depth min_depth_point = min_depth_pixel if max_depth_pixel is not None: max_depth_val = max_depth max_depth_point = max_depth_pixel # 計算邊界框的中心點座標 center_x = int((left + right) / 2) center_y = int((top + bottom) / 2) depth_pixel = [center_x, center_y] center_depth, _ = get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin) # 獲取中心點的深度 # 印出最小、最大和中心深度的資訊 print(f"Minimum depth at: {min_depth_pixel}, value: {min_depth}") print(f"Maximum depth at: {max_depth_pixel}, value: {max_depth}") print(f"Center depth at: {depth_pixel}, value: {center_depth}") # 在畫面上標示中心點及最大最小深度值點 cv2.circle(frame, (center_x, center_y), 5, GREEN_COLOR, -1) cv2.circle(frame, (min_depth_point[0], min_depth_point[1]), 5, RED_COLOR, -1) cv2.circle(frame, (max_depth_point[0], max_depth_point[1]), 5, YELLOW_COLOR, -1) center_points.append((center_x, center_y, center_depth)) #檢查是否有至少兩個偵測到的邊界框，並計算兩個中心點之間的距離 if len(out_boxes) >= 2: center_x1, center_y1, center_depth1 = center_points[0] center_x2, center_y2, center_depth2 = center_points[1] # 計算兩個中心點之間的距離 distance_x = center_x2 - center_x1 distance_y = center_y2 - center_y1 print(f"Center points distance (x, y): ({distance_x}, {distance_y})") cv2.setMouseCallback('RealSence', mouse_callback) # 設定滑鼠回調函數，用來處理滑鼠事件 # 在畫面上顯示 FPS frame = cv2.putText(frame, "fps= %.2f"%(fps), (0, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) # 計算並更新 FPS fps = ( fps + (1./(time.time()-t1)) ) / 2 print("fps= %.2f"%(fps)) # 印出 FPS cv2.imshow('RealSense', canvas) # 顯示處理後的畫面 key = cv2.waitKey(1)&0xff # 讀取使用者按下的按鍵 # 計算並印出執行時間 elapsed_time = time.time() - start_time print("Time: %.2f seconds" % elapsed_time) # 如果使用者按下 'q'，則跳出迴圈 if key == ord('q'): break # 如果使用者按下 's'，則等待使用者再次按下按鍵 if key == ord('s'): cv2.waitKey(0) pipeline.stop() cv2.destroyAllWindows() ``` ## 簡易的關係顯示出來 ```python= #匯入必要的函式庫 import pyrealsense2 as rs import numpy as np import cv2 import time import tensorflow as tf from PIL import Image from yolo import YOLO, YOLO_ONNX #獲取系統上所有GPU裝置 gpus = tf.config.experimental.list_physical_devices(device_type='GPU') for gpu in gpus: #為每個GPU啟用記憶體增長 #可以避免Tensorflow一開始就使用所有的GPU記憶體 #而是按照需增加GPU記憶體使用量 tf.config.experimental.set_memory_growth(gpu, True) GREEN_COLOR = (0, 255, 0) RED_COLOR = (0, 0, 255) YELLOW_COLOR = (0, 255, 255) # 設定更大的畫布大小 canvas_height, canvas_width = 720, 1800 # 您可以根據需要調整大小 # 創建一個足夠大的畫布 canvas = np.zeros((canvas_height, canvas_width, 3), dtype=np.uint8) # 設定左半邊和右半邊的區域 left_half = canvas[:, :canvas_width // 2, :] # 設定右半邊的上半部分和下半部分 top_half = canvas[:canvas_height // 2, canvas_width // 2:, :] bottom_half = canvas[canvas_height // 2:, canvas_width // 2:, :] displayed_relations = set() #創建並啟動realsense pipeline pipeline = rs.pipeline() config = rs.config() #運用深度及彩色串流 config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 15) config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 15) #啟動pipline pipe_profile = pipeline.start(config) #指定對齊目標為彩色串流 align_to = rs.stream.color #將深度串流對齊至彩色串流 align = rs.align(align_to) #創見一個視窗名為RealSense cv2.namedWindow('RealSence',0) def get_aligned_images(): frames = pipeline.wait_for_frames()#從realsense piplin獲取一組新的畫面 aligned_frames = align.process(frames)#將深度影像對齊至彩色影像,並返回一組新的已對齊畫面 aligned_depth_frame = aligned_frames.get_depth_frame()#從已對齊畫面取得深度畫面 aligned_color_frame = aligned_frames.get_color_frame()#從已對齊畫面取得彩色畫面 depth_intrin = aligned_depth_frame.profile.as_video_stream_profile().intrinsics#獲取深度畫面的內部參數,如：主座標 color_intrin = aligned_color_frame.profile.as_video_stream_profile().intrinsics#獲取彩色畫面的內部參數 img_color = np.asanyarray(aligned_color_frame.get_data())#將彩色畫面轉換Numpy陣列 img_depth = np.asanyarray(aligned_depth_frame.get_data())#將深度畫面轉換為Numpy陣列 # 將彩色圖像調整為較小的解析度 resized_color = cv2.resize(img_color, (320, 240)) return color_intrin, depth_intrin, img_color, img_depth, aligned_depth_frame, resized_color#返回內部參數,彩色圖像,深度圖像及深度畫面 def get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin): #將輸入的深度影像座標分解為x,y部份 x = depth_pixel[0] y = depth_pixel[1] # 確保像素座標在有效範圍內 if x < 0 or x >= aligned_depth_frame.width or y < 0 or y >= aligned_depth_frame.height: print("無效的像素座標。") return 0, (0, 0, 0) #運用rs SDK get_distance方式取得指定像素深度值 dis = aligned_depth_frame.get_distance(x, y) camera_coordinate = rs.rs2_deproject_pixel_to_point(depth_intrin, depth_pixel, dis)#運用rs SDK rs2_dep....函數將2D像素座標轉換到3D相機座標系中的點 return dis, camera_coordinate def mouse_callback(event, x, y, flags, param): if event == cv2.EVENT_LBUTTONDOWN: print("滑鼠點擊事件座標 (x, y):", x, y) depth_pixel = [x, y] dis, camera_coordinate = get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin) print ('深度: ',dis) #print ('camera_coordinate: ',camera_coordinate) #初始化幀率及YOLO模型 fps = 0.0 yolo = YOLO() yolo.confidence = 0.3 frame_skip = 5 skip_counter = 0 #主迴圈 if __name__=="__main__": # 確定此程式作為主程式執行 while True: # 持續執行直到使用者退出 start_time = time.time() # 紀錄程式開始的時間 t1 = time.time() color_intrin, depth_intrin, img_color, img_depth, aligned_depth_frame, resized_color = get_aligned_images() # 獲取對齊的影像和內部參數 frame = cv2.cvtColor(img_color, cv2.COLOR_BGR2RGB) # 將 BGR 圖像轉換成 RGB frame = Image.fromarray(np.uint8(frame)) # 將 numpy 陣列轉換成 Image 物件 image, out_boxes = yolo.detect_image(frame) # 使用 YOLO 偵測物件，只回傳邊界框 frame = np.array(image) # 將 Image 物件轉換回 numpy 陣列 frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) # 將影像繪製到左半邊 # 遍歷所有偵測到的邊界框，並根據需要將它們分配到左半邊或右半邊 for i, (top, left, bottom, right) in enumerate(out_boxes): object_class = yolo.class_names[i] # 獲取物品的種類 text = f"Object {i + 1}: {object_class}" cv2.putText(left_half, text, (50, 50 + i * 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2) # 在左半邊的彩色影像上顯示物品偵測結果 for i, (top, left, bottom, right) in enumerate(out_boxes): object_class = yolo.class_names[i] # 獲取物品的種類 text = f"Object {i + 1}: {object_class}" cv2.putText(left_half, text, (50, 50 + i * 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2) # 在右半邊的區域上顯示右半邊的物品名稱 for i, (top, left, bottom, right) in enumerate(out_boxes): object_class = yolo.class_names[i] # 獲取物品的種類 text = f"Object {i + 1}: {object_class}" cv2.putText(top_half, text, (50, 50 + i * 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2) # 調整 frame 的大小以符合 left_half frame = cv2.resize(frame, (left_half.shape[1], left_half.shape[0])) left_half[:, :, :] = frame # 遍歷所有偵測到的邊界框 # 在遍歷邊界框之前，初始化變數來儲存最遠和最近的深度 min_depth_val = float('inf') max_depth_val = float('-inf') min_depth_point = None max_depth_point = None center_points = [] for top, left, bottom, right in out_boxes: min_depth = float('inf') # 初始化最小深度為無窮大 max_depth = float('-inf') # 初始化最大深度為無窮小 min_depth_pixel = None # 初始化最小深度的像素點為 None max_depth_pixel = None # 初始化最大深度的像素點為 None center_x = (left + right) / 2 # 在每个边界框处理完毕后执行左右关系检测 for i in range(1, len(out_boxes) + 1): # 從1開始，直到len(out_boxes) + 1 for j in range(i + 1, len(out_boxes) + 1): # 從i+1開始，直到len(out_boxes) + 1 center_x1 = (out_boxes[i - 1][1] + out_boxes[i - 1][3]) / 2 center_x2 = (out_boxes[j - 1][1] + out_boxes[j - 1][3]) / 2 # 定义一个阈值，用于确定左右关系 threshold = 20 # 可以根据实际情况调整阈值 if center_x1 < center_x2 - threshold: relation = "left" elif center_x1 > center_x2 + threshold: relation = "right" else: relation = "overlap" # 將左右關係信息添加到右半邊的下半部分 text = f"Bounding Box {i} is on the {relation} side of Bounding Box {j}" line_height = 30 # 設定每行文字的高度 y_position = 50 + ((i - 1) * (len(out_boxes)) + j - 1) * line_height cv2.putText(bottom_half, text, (50, y_position), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2) # 遍歷邊界框內的所有像素 for y in range(int(top), int(bottom)): for x in range(int(left), int(right)): depth_pixel = [x, y] # 建立像素座標 dis, _ = get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin) # 獲取像素點的深度 # 更新最小和最大深度 if dis > 0: if dis < min_depth: min_depth = dis min_depth_pixel = depth_pixel if dis > max_depth: max_depth = dis max_depth_pixel = depth_pixel if min_depth_pixel is not None: min_depth_val = min_depth min_depth_point = min_depth_pixel if max_depth_pixel is not None: max_depth_val = max_depth max_depth_point = max_depth_pixel # 計算邊界框的中心點座標 center_x = int((left + right) / 2) center_y = int((top + bottom) / 2) depth_pixel = [center_x, center_y] center_depth, _ = get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin) # 獲取中心點的深度 # 印出最小、最大和中心深度的資訊 print(f"Minimum depth at: {min_depth_pixel}, value: {min_depth}") print(f"Maximum depth at: {max_depth_pixel}, value: {max_depth}") print(f"Center depth at: {depth_pixel}, value: {center_depth}") # 在畫面上標示中心點及最大最小深度值點 cv2.circle(frame, (center_x, center_y), 5, GREEN_COLOR, -1) cv2.circle(frame, (min_depth_point[0], min_depth_point[1]), 5, RED_COLOR, -1) cv2.circle(frame, (max_depth_point[0], max_depth_point[1]), 5, YELLOW_COLOR, -1) center_points.append((center_x, center_y, center_depth)) #檢查是否有至少兩個偵測到的邊界框，並計算兩個中心點之間的距離 if len(out_boxes) >= 2: center_x1, center_y1, center_depth1 = center_points[0] center_x2, center_y2, center_depth2 = center_points[1] # 計算兩個中心點之間的距離 distance_x = center_x2 - center_x1 distance_y = center_y2 - center_y1 print(f"Center points distance (x, y): ({distance_x}, {distance_y})") cv2.setMouseCallback('RealSence', mouse_callback) # 設定滑鼠回調函數，用來處理滑鼠事件 # 在畫面上顯示 FPS frame = cv2.putText(frame, "fps= %.2f"%(fps), (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) # 計算並更新 FPS fps = ( fps + (1./(time.time()-t1)) ) / 2 print("fps= %.2f"%(fps)) # 印出 FPS cv2.imshow('RealSense', canvas) # 顯示處理後的畫面 key = cv2.waitKey(1)&0xff # 讀取使用者按下的按鍵 # 計算並印出執行時間 elapsed_time = time.time() - start_time print("Time: %.2f seconds" % elapsed_time) # 如果使用者按下 'q'，則跳出迴圈 if key == ord('q'): break # 如果使用者按下 's'，則等待使用者再次按下按鍵 if key == ord('s'): cv2.waitKey(0) pipeline.stop() cv2.destroyAllWindows() ``` ![](https://hackmd.io/_uploads/rk9myfPJa.png)