## RGB
```python=
import cv2
import numpy as np
cap = cv2.VideoCapture(0)
# 定義滑鼠點擊事件
def mouse_callback(event, x, y, flags, param):
if event == cv2.EVENT_LBUTTONDOWN:
# 輸出滑鼠點擊位置的像素座標
print("滑鼠點擊事件座標 (x, y):", x, y)
# 獲取指定位置的顏色RGB值
b, g, r = frame[y, x]
print("像素值 (BGR):", b, g, r)
# 創建視窗綁定滑鼠點擊事件範圍
cv2.namedWindow('frame', 0)
while True:
ret_,frame = cap.read()
# 滑鼠點擊事件
cv2.setMouseCallback('frame', mouse_callback)
# 顯示影像
cv2.imshow('frame', frame)
# 等待滑鼠點擊事件
key = cv2.waitKey(1) & 0xff
if key == ord('q'):
break
# 關閉視窗以及攝影機節省運作資源
cap.release()
cv2.destroyAllWindows()
```
## 再來就是RealSense用Realsense點擊螢幕獲取資訊了
> 學會取得影像座標
> 你就可以試著將RealSense融合進來
> 取得某個點的影像座標以及深度
> 不囉唆直接上程式碼
> [color=#DA0000]
```python=
import pyrealsense2 as rs
import numpy as np
import cv2
pipeline = rs.pipeline()
config = rs.config()
config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 15)
config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 15)
pipe_profile = pipeline.start(config)
align_to = rs.stream.color
align = rs.align(align_to)
cv2.namedWindow('RealSense', 0)
def get_aligned_images():
frames = pipeline.wait_for_frames()
aligned_frames = align.process(frames)
aligned_depth_frame = aligned_frames.get_depth_frame()
aligned_color_frame = aligned_frames.get_color_frame()
depth_intrin = aligned_depth_frame.profile.as_video_stream_profile().intrinsics
color_intrin = aligned_color_frame.profile.as_video_stream_profile().intrinsics
img_color = np.asanyarray(aligned_color_frame.get_data())
img_depth = np.asanyarray(aligned_depth_frame.get_data())
return color_intrin, depth_intrin, img_color, img_depth, aligned_depth_frame
def get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin):
x = depth_pixel[0]
y = depth_pixel[1]
dis = aligned_depth_frame.get_distance(x, y)
camera_coordinate = rs.rs2_deproject_pixel_to_point(depth_intrin, depth_pixel, dis)
return dis, camera_coordinate
def mouse_callback(event, x, y, flags, param):
if event == cv2.EVENT_LBUTTONDOWN:
# 輸出滑鼠點擊位置的像素座標
print("滑鼠點擊事件座標 (x, y):", x, y)
depth_pixel = [x, y]
dis, camera_coordinate = get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin)
print('深度: ', dis) # 深度單位是m
#print('camera_coordinate: ', camera_coordinate)
if __name__ == "__main__":
while True:
color_intrin, depth_intrin, img_color, img_depth, aligned_depth_frame = get_aligned_images()
cv2.setMouseCallback('RealSense', mouse_callback)
cv2.imshow('RealSense', img_color)
key = cv2.waitKey(1) & 0xff
if key == ord('q'):
break
pipeline.stop()
cv2.destroyAllWindows()
```
## ==yolo配合Realsense運用滑鼠點擊返回深度值==
```python=
#匯入必要的函式庫
import pyrealsense2 as rs
import numpy as np
import cv2
import time
import tensorflow as tf
from PIL import Image
from yolo import YOLO, YOLO_ONNX
#獲取系統上所有GPU裝置
gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
for gpu in gpus:
#為每個GPU啟用記憶體增長import argparse
from PIL import Image
import matplotlib.pyplot as
#可以避免Tensorflow一開始就使用所有的GPU記憶體
#而是按照需增加GPU記憶體使用量
tf.config.experimental.set_memory_growth(gpu, True)
#創建並啟動realsense pipeline
pipeline = rs.pipeline()
config = rs.config()
#運用深度及彩色串流
config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 15)
config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 15)
#啟動pipline
pipe_profile = pipeline.start(config)
#指定對齊目標為彩色串流
align_to = rs.stream.color
#將深度串流對齊至彩色串流
align = rs.align(align_to)
#創見一個視窗名為RealSense
cv2.namedWindow('RealSence',0)
def get_aligned_images():
frames = pipeline.wait_for_frames()#從realsense piplin獲取一組新的畫面
aligned_frames = align.process(frames)#將深度影像對齊至彩色影像,並返回一組新的已對齊畫面
aligned_depth_frame = aligned_frames.get_depth_frame()#從已對齊畫面取得深度畫面
aligned_color_frame = aligned_frames.get_color_frame()#從已對齊畫面取得彩色畫面
depth_intrin = aligned_depth_frame.profile.as_video_stream_profile().intrinsics#獲取深度畫面的內部參數,如:主座標
color_intrin = aligned_color_frame.profile.as_video_stream_profile().intrinsics#獲取彩色畫面的內部參數
img_color = np.asanyarray(aligned_color_frame.get_data())#將彩色畫面轉換Numpy陣列
img_depth = np.asanyarray(aligned_depth_frame.get_data())#將深度畫面轉換為Numpy陣列
return color_intrin, depth_intrin, img_color, img_depth, aligned_depth_frame#返回內部參數,彩色圖像,深度圖像及深度畫面
def get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin):
#將輸入的深度影像座標分解為x,y部份
x = depth_pixel[0]
y = depth_pixel[1]
#運用rs SDK get_distance方式取得指定像素深度值
dis = aligned_depth_frame.get_distance(x, y)
camera_coordinate = rs.rs2_deproject_pixel_to_point(depth_intrin, depth_pixel, dis)#運用rs SDK rs2_dep....函數將2D像素座標轉換到3D相機座標系中的點
return dis, camera_coordinate
def mouse_callback(event, x, y, flags, param):
if event == cv2.EVENT_LBUTTONDOWN:
print("滑鼠點擊事件座標 (x, y):", x, y)
depth_pixel = [x, y]
dis, camera_coordinate = get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin)
print ('深度: ',dis)
#print ('camera_coordinate: ',camera_coordinate)
#初始化幀率及YOLO模型
fps = 0.0
yolo = YOLO()
#主迴圈
if __name__=="__main__":
while True:
t1 = time.time()#紀錄當前時間
color_intrin, depth_intrin, img_color, img_depth, aligned_depth_frame = get_aligned_images()# 呼叫 get_aligned_images() 函數並將回傳的結果分別賦值給變數 color_intrin、depth_intrin、img_color、img_depth、aligned_depth_frame
frame = cv2.cvtColor(img_color,cv2.COLOR_BGR2RGB)#將輸入BGR圖像轉成RGB圖像
frame = Image.fromarray(np.uint8(frame))#將numpy陣列轉成PIL圖像
frame = np.array(yolo.detect_image(frame))#用YOLO模型進行物件檢測並將結果轉換回 NumPy 陣列
frame = cv2.cvtColor(frame,cv2.COLOR_RGB2BGR)#將RGB圖像轉換回BGR格式
cv2.setMouseCallback('RealSence', mouse_callback)
frame = cv2.putText(frame, "fps= %.2f"%(fps), (0, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)# 在圖像上繪製每秒幀數
# 計算每秒幀數及打印出來
fps = ( fps + (1./(time.time()-t1)) ) / 2
print("fps= %.2f"%(fps))
#顯示圖像及等待並獲取用戶輸入
cv2.imshow('RealSence',frame)
key = cv2.waitKey(1)&0xff
#如果按下q會跳出主迴圈,按下s則暫停畫面更新
if key == ord('q'):
break
if key == ord('s'):
cv2.waitKey(0)
pipeline.stop()
cv2.destroyAllWindows()
```
{%youtube h1T7rwze-Pc%}
## ==透過修改def detect_image顯示bounding box中心點深度值==
```python=
def detect_image(self, image, depth_image,crop = False, count = False):
#---------------------------------------------------------#
# 在这里将图像转换成RGB图像,防止灰度图在预测时报错。
# 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB
#---------------------------------------------------------#
image = cvtColor(image)
#---------------------------------------------------------#
# 给图像增加灰条,实现不失真的resize
# 也可以直接resize进行识别
#---------------------------------------------------------#
image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image)
#---------------------------------------------------------#
# 添加上batch_size维度,并进行归一化
#---------------------------------------------------------#
image_data = np.expand_dims(preprocess_input(np.array(image_data, dtype='float32')), 0)
#---------------------------------------------------------#
# 将图像输入网络当中进行预测!
#---------------------------------------------------------#
input_image_shape = np.expand_dims(np.array([image.size[1], image.size[0]], dtype='float32'), 0)
out_boxes, out_scores, out_classes = self.get_pred(image_data, input_image_shape)
print('Found {} boxes for {}'.format(len(out_boxes), 'img'))
#---------------------------------------------------------#
# 设置字体与边框厚度
#---------------------------------------------------------#
font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))
thickness = int(max((image.size[0] + image.size[1]) // np.mean(self.input_shape), 1))
#---------------------------------------------------------#
# 计数
#---------------------------------------------------------#
if count:
print("top_label:", out_classes)
classes_nums = np.zeros([self.num_classes])
for i in range(self.num_classes):
num = np.sum(out_classes == i)
if num > 0:
print(self.class_names[i], " : ", num)
classes_nums[i] = num
print("classes_nums:", classes_nums)
#---------------------------------------------------------#
# 是否进行目标的裁剪
#---------------------------------------------------------#
if crop:
for i, c in list(enumerate(out_boxes)):
top, left, bottom, right = out_boxes[i]
top = max(0, np.floor(top).astype('int32'))
left = max(0, np.floor(left).astype('int32'))
bottom = min(image.size[1], np.floor(bottom).astype('int32'))
right = min(image.size[0], np.floor(right).astype('int32'))
dir_save_path = "img_crop"
if not os.path.exists(dir_save_path):
os.makedirs(dir_save_path)
crop_image = image.crop([left, top, right, bottom])
crop_image.save(os.path.join(dir_save_path, "crop_" + str(i) + ".png"), quality=95, subsampling=0)
print("save crop_" + str(i) + ".png to " + dir_save_path)
#---------------------------------------------------------#
# 图像绘制
#---------------------------------------------------------#
depth_image = np.array(depth_image)#將深度影像轉成numpy數組
for i, c in list(enumerate(out_classes)):
predicted_class = self.class_names[int(c)]
box = out_boxes[i]
score = out_scores[i]
top, left, bottom, right = box
top = max(0, np.floor(top).astype('int32'))
left = max(0, np.floor(left).astype('int32'))
bottom = min(image.size[1], np.floor(bottom).astype('int32'))
right = min(image.size[0], np.floor(right).astype('int32'))
#計算bounding box中心點
center_x = int((left + right) / 2)
center_y = int((top + bottom) / 2)
#取得邊界框中心點在深度圖像中的深度值
depth_at_center = depth_image[center_y, center_x]
#將結果打印出來
print(f'Depth at center of bounding box: {depth_at_center} mm')
label = '{} {:.2f}'.format(predicted_class, score)
draw = ImageDraw.Draw(image)
label_size = draw.textsize(label, font)
label = label.encode('utf-8')
print(label, top, left, bottom, right)
if top - label_size[1] >= 0:
text_origin = np.array([left, top - label_size[1]])
else:
text_origin = np.array([left, top + 1])
for i in range(thickness):
draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[c])
draw.rectangle([tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[c])
draw.text(text_origin, str(label,'UTF-8'), fill=(0, 0, 0), font=font)
del draw
return image
```
### ==透過修改def detect_image(主迴圈程式加計時)==
```python=
#匯入必要的函式庫
import pyrealsense2 as rs
import numpy as np
import cv2
import time
import tensorflow as tf
from PIL import Image
from yolo import YOLO, YOLO_ONNX
#獲取系統上所有GPU裝置
gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
for gpu in gpus:
#為每個GPU啟用記憶體增長
#可以避免Tensorflow一開始就使用所有的GPU記憶體
#而是按照需增加GPU記憶體使用量
tf.config.experimental.set_memory_growth(gpu, True)
#創建並啟動realsense pipeline
pipeline = rs.pipeline()
config = rs.config()
#運用深度及彩色串流
config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 15)
config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 15)
#啟動pipline
pipe_profile = pipeline.start(config)
#指定對齊目標為彩色串流
align_to = rs.stream.color
#將深度串流對齊至彩色串流
align = rs.align(align_to)
#創見一個視窗名為RealSense
cv2.namedWindow('RealSence',0)
def get_aligned_images():
frames = pipeline.wait_for_frames()#從realsense piplin獲取一組新的畫面
aligned_frames = align.process(frames)#將深度影像對齊至彩色影像,並返回一組新的已對齊畫面
aligned_depth_frame = aligned_frames.get_depth_frame()#從已對齊畫面取得深度畫面
aligned_color_frame = aligned_frames.get_color_frame()#從已對齊畫面取得彩色畫面
depth_intrin = aligned_depth_frame.profile.as_video_stream_profile().intrinsics#獲取深度畫面的內部參數,如:主座標
color_intrin = aligned_color_frame.profile.as_video_stream_profile().intrinsics#獲取彩色畫面的內部參數
img_color = np.asanyarray(aligned_color_frame.get_data())#將彩色畫面轉換Numpy陣列
img_depth = np.asanyarray(aligned_depth_frame.get_data())#將深度畫面轉換為Numpy陣列
return color_intrin, depth_intrin, img_color, img_depth, aligned_depth_frame#返回內部參數,彩色圖像,深度圖像及深度畫面
def get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin):
#將輸入的深度影像座標分解為x,y部份
x = depth_pixel[0]
y = depth_pixel[1]
#運用rs SDK get_distance方式取得指定像素深度值
dis = aligned_depth_frame.get_distance(x, y)
camera_coordinate = rs.rs2_deproject_pixel_to_point(depth_intrin, depth_pixel, dis)#運用rs SDK rs2_dep....函數將2D像素座標轉換到3D相機座標系中的點
return dis, camera_coordinate
def mouse_callback(event, x, y, flags, param):
if event == cv2.EVENT_LBUTTONDOWN:
print("滑鼠點擊事件座標 (x, y):", x, y)
depth_pixel = [x, y]
dis, camera_coordinate = get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin)
print ('深度: ',dis)
#print ('camera_coordinate: ',camera_coordinate)
#初始化幀率及YOLO模型
fps = 0.0
yolo = YOLO()
#主迴圈
if __name__=="__main__":
while True:
#計時開始
start_time = time.time()
t1 = time.time()#紀錄當前時間
color_intrin, depth_intrin, img_color, img_depth, aligned_depth_frame = get_aligned_images()# 呼叫 get_aligned_images() 函數並將回傳的結果分別賦值給變數 color_intrin、depth_intrin、img_color、img_depth、aligned_depth_frame
frame = cv2.cvtColor(img_color,cv2.COLOR_BGR2RGB)#將輸入BGR圖像轉成RGB圖像
frame = Image.fromarray(np.uint8(frame))#將numpy陣列轉成PIL圖像
#用YOLO模型進行物件檢測並將結果轉換回 NumPy 陣列
# 增加了 img_depth 参数
frame = np.array(yolo.detect_image(frame, img_depth))
frame = cv2.cvtColor(frame,cv2.COLOR_RGB2BGR)#將RGB圖像轉換回BGR格式
cv2.setMouseCallback('RealSence', mouse_callback)
frame = cv2.putText(frame, "fps= %.2f"%(fps), (0, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)# 在圖像上繪製每秒幀數
# 計算每秒幀數及打印出來
fps = ( fps + (1./(time.time()-t1)) ) / 2
print("fps= %.2f"%(fps))
#顯示圖像及等待並獲取用戶輸入
cv2.imshow('RealSence',frame)
key = cv2.waitKey(1)&0xff
#計時停止
elapsed_time = time.time() - start_time
print("Time: %.2f seconds" % elapsed_time)
#如果按下q會跳出主迴圈,按下s則暫停畫面更新
if key == ord('q'):
break
if key == ord('s'):
cv2.waitKey(0)
pipeline.stop()
cv2.destroyAllWindows()
```
{%youtube qe2cwuZ9Obc%}
## ==透過從def detect_image吐出四個值再從主程式修改==
```python=
def detect_image(self, image, crop = False, count = False):
#---------------------------------------------------------#
# 在这里将图像转换成RGB图像,防止灰度图在预测时报错。
# 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB
#---------------------------------------------------------#
image = cvtColor(image)
#---------------------------------------------------------#
# 给图像增加灰条,实现不失真的resize
# 也可以直接resize进行识别
#---------------------------------------------------------#
image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image)
#---------------------------------------------------------#
# 添加上batch_size维度,并进行归一化
#---------------------------------------------------------#
image_data = np.expand_dims(preprocess_input(np.array(image_data, dtype='float32')), 0)
#---------------------------------------------------------#
# 将图像输入网络当中进行预测!
#---------------------------------------------------------#
input_image_shape = np.expand_dims(np.array([image.size[1], image.size[0]], dtype='float32'), 0)
out_boxes, out_scores, out_classes = self.get_pred(image_data, input_image_shape)
print('Found {} boxes for {}'.format(len(out_boxes), 'img'))
#---------------------------------------------------------#
# 设置字体与边框厚度
#---------------------------------------------------------#
font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))
thickness = int(max((image.size[0] + image.size[1]) // np.mean(self.input_shape), 1))
#---------------------------------------------------------#
# 计数
#---------------------------------------------------------#
if count:
print("top_label:", out_classes)
classes_nums = np.zeros([self.num_classes])
for i in range(self.num_classes):
num = np.sum(out_classes == i)
if num > 0:
print(self.class_names[i], " : ", num)
classes_nums[i] = num
print("classes_nums:", classes_nums)
#---------------------------------------------------------#
# 是否进行目标的裁剪
#---------------------------------------------------------#
if crop:
for i, c in list(enumerate(out_boxes)):
top, left, bottom, right = out_boxes[i]
top = max(0, np.floor(top).astype('int32'))
left = max(0, np.floor(left).astype('int32'))
bottom = min(image.size[1], np.floor(bottom).astype('int32'))
right = min(image.size[0], np.floor(right).astype('int32'))
dir_save_path = "img_crop"
if not os.path.exists(dir_save_path):
os.makedirs(dir_save_path)
crop_image = image.crop([left, top, right, bottom])
crop_image.save(os.path.join(dir_save_path, "crop_" + str(i) + ".png"), quality=95, subsampling=0)
print("save crop_" + str(i) + ".png to " + dir_save_path)
#---------------------------------------------------------#
# 图像绘制
#---------------------------------------------------------#
for i, c in list(enumerate(out_classes)):
predicted_class = self.class_names[int(c)]
box = out_boxes[i]
score = out_scores[i]
top, left, bottom, right = box
top = max(0, np.floor(top).astype('int32'))
left = max(0, np.floor(left).astype('int32'))
bottom = min(image.size[1], np.floor(bottom).astype('int32'))
right = min(image.size[0], np.floor(right).astype('int32'))
label = '{} {:.2f}'.format(predicted_class, score)
draw = ImageDraw.Draw(image)
label_size = draw.textsize(label, font)
label = label.encode('utf-8')
print(label, top, left, bottom, right)
if top - label_size[1] >= 0:
text_origin = np.array([left, top - label_size[1]])
else:
text_origin = np.array([left, top + 1])
for i in range(thickness):
draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[c])
draw.rectangle([tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[c])
draw.text(text_origin, str(label,'UTF-8'), fill=(0, 0, 0), font=font)
del draw
return image, out_boxes
```
### ==predict程式內容==
```python=
#匯入必要的函式庫
import pyrealsense2 as rs
import numpy as np
import cv2
import time
import tensorflow as tf
from PIL import Image
from yolo import YOLO, YOLO_ONNX
#獲取系統上所有GPU裝置
gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
for gpu in gpus:
#為每個GPU啟用記憶體增長
#可以避免Tensorflow一開始就使用所有的GPU記憶體
#而是按照需增加GPU記憶體使用量
tf.config.experimental.set_memory_growth(gpu, True)
#創建並啟動realsense pipeline
pipeline = rs.pipeline()
config = rs.config()
#運用深度及彩色串流
config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 15)
config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 15)
#啟動pipline
pipe_profile = pipeline.start(config)
#指定對齊目標為彩色串流
align_to = rs.stream.color
#將深度串流對齊至彩色串流
align = rs.align(align_to)
#創見一個視窗名為RealSense
cv2.namedWindow('RealSence',0)
def get_aligned_images():
frames = pipeline.wait_for_frames()#從realsense piplin獲取一組新的畫面
aligned_frames = align.process(frames)#將深度影像對齊至彩色影像,並返回一組新的已對齊畫面
aligned_depth_frame = aligned_frames.get_depth_frame()#從已對齊畫面取得深度畫面
aligned_color_frame = aligned_frames.get_color_frame()#從已對齊畫面取得彩色畫面
depth_intrin = aligned_depth_frame.profile.as_video_stream_profile().intrinsics#獲取深度畫面的內部參數,如:主座標
color_intrin = aligned_color_frame.profile.as_video_stream_profile().intrinsics#獲取彩色畫面的內部參數
img_color = np.asanyarray(aligned_color_frame.get_data())#將彩色畫面轉換Numpy陣列
img_depth = np.asanyarray(aligned_depth_frame.get_data())#將深度畫面轉換為Numpy陣列
return color_intrin, depth_intrin, img_color, img_depth, aligned_depth_frame#返回內部參數,彩色圖像,深度圖像及深度畫面
def get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin):
#將輸入的深度影像座標分解為x,y部份
x = depth_pixel[0]
y = depth_pixel[1]
#運用rs SDK get_distance方式取得指定像素深度值
dis = aligned_depth_frame.get_distance(x, y)
camera_coordinate = rs.rs2_deproject_pixel_to_point(depth_intrin, depth_pixel, dis)#運用rs SDK rs2_dep....函數將2D像素座標轉換到3D相機座標系中的點
return dis, camera_coordinate
def mouse_callback(event, x, y, flags, param):
if event == cv2.EVENT_LBUTTONDOWN:
print("滑鼠點擊事件座標 (x, y):", x, y)
depth_pixel = [x, y]
dis, camera_coordinate = get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin)
print ('深度: ',dis)
#print ('camera_coordinate: ',camera_coordinate)
#初始化幀率及YOLO模型
fps = 0.0
yolo = YOLO()
#主迴圈
if __name__=="__main__":
while True:
start_time = time.time()
t1 = time.time()#紀錄當前時間
color_intrin, depth_intrin, img_color, img_depth, aligned_depth_frame = get_aligned_images()# 呼叫 get_aligned_images() 函數並將回傳的結果分別賦值給變數 color_intrin、depth_intrin、img_color、img_depth、aligned_depth_frame
frame = cv2.cvtColor(img_color,cv2.COLOR_BGR2RGB)#將輸入BGR圖像轉成RGB圖像
frame = Image.fromarray(np.uint8(frame))#將numpy陣列轉成PIL圖像
image, out_boxes = yolo.detect_image(frame)
frame = np.array(image)#用YOLO模型進行物件檢測並將結果轉換回 NumPy 陣列
frame = cv2.cvtColor(frame,cv2.COLOR_RGB2BGR)#將RGB圖像轉換回BGR格式
for top, left, bottom, right in out_boxes: # 這裡假設每個 box 的格式是 (top, left, bottom, right)
center_x = int((left + right) / 2)
center_y = int((top + bottom) / 2)
depth_pixel = [center_x, center_y]
dis, _ = get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin)
print(f"Depth at center of bounding box: {dis}")
cv2.setMouseCallback('RealSence', mouse_callback)
frame = cv2.putText(frame, "fps= %.2f"%(fps), (0, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)# 在圖像上繪製每秒幀數
# 計算每秒幀數及打印出來
fps = ( fps + (1./(time.time()-t1)) ) / 2
print("fps= %.2f"%(fps))
#顯示圖像及等待並獲取用戶輸入
cv2.imshow('RealSence',frame)
key = cv2.waitKey(1)&0xff
elapsed_time = time.time() - start_time
print("Time: %.2f seconds" % elapsed_time)
#如果按下q會跳出主迴圈,按下s則暫停畫面更新
if key == ord('q'):
break
if key == ord('s'):
cv2.waitKey(0)
pipeline.stop()
cv2.destroyAllWindows()
```
## 運用抓取中心點深度值程式增加bounding box內取出最大值最小值(predict)
```python=
#匯入必要的函式庫
import pyrealsense2 as rs
import numpy as np
import cv2
import time
import tensorflow as tf
from PIL import Image
from yolo import YOLO, YOLO_ONNX
#獲取系統上所有GPU裝置
gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
for gpu in gpus:
#為每個GPU啟用記憶體增長
#可以避免Tensorflow一開始就使用所有的GPU記憶體
#而是按照需增加GPU記憶體使用量
tf.config.experimental.set_memory_growth(gpu, True)
#創建並啟動realsense pipeline
pipeline = rs.pipeline()
config = rs.config()
#運用深度及彩色串流
config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 15)
config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 15)
#啟動pipline
pipe_profile = pipeline.start(config)
#指定對齊目標為彩色串流
align_to = rs.stream.color
#將深度串流對齊至彩色串流
align = rs.align(align_to)
#創見一個視窗名為RealSense
cv2.namedWindow('RealSence',0)
def get_aligned_images():
frames = pipeline.wait_for_frames()#從realsense piplin獲取一組新的畫面
aligned_frames = align.process(frames)#將深度影像對齊至彩色影像,並返回一組新的已對齊畫面
aligned_depth_frame = aligned_frames.get_depth_frame()#從已對齊畫面取得深度畫面
aligned_color_frame = aligned_frames.get_color_frame()#從已對齊畫面取得彩色畫面
depth_intrin = aligned_depth_frame.profile.as_video_stream_profile().intrinsics#獲取深度畫面的內部參數,如:主座標
color_intrin = aligned_color_frame.profile.as_video_stream_profile().intrinsics#獲取彩色畫面的內部參數
img_color = np.asanyarray(aligned_color_frame.get_data())#將彩色畫面轉換Numpy陣列
img_depth = np.asanyarray(aligned_depth_frame.get_data())#將深度畫面轉換為Numpy陣列
return color_intrin, depth_intrin, img_color, img_depth, aligned_depth_frame#返回內部參數,彩色圖像,深度圖像及深度畫面
def get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin):
#將輸入的深度影像座標分解為x,y部份
x = depth_pixel[0]
y = depth_pixel[1]
#運用rs SDK get_distance方式取得指定像素深度值
dis = aligned_depth_frame.get_distance(x, y)
camera_coordinate = rs.rs2_deproject_pixel_to_point(depth_intrin, depth_pixel, dis)#運用rs SDK rs2_dep....函數將2D像素座標轉換到3D相機座標系中的點
return dis, camera_coordinate
def mouse_callback(event, x, y, flags, param):
if event == cv2.EVENT_LBUTTONDOWN:
print("滑鼠點擊事件座標 (x, y):", x, y)
depth_pixel = [x, y]
dis, camera_coordinate = get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin)
print ('深度: ',dis)
#print ('camera_coordinate: ',camera_coordinate)
#初始化幀率及YOLO模型
fps = 0.0
yolo = YOLO()
#主迴圈
if __name__=="__main__": # 確定此程式作為主程式執行
while True: # 持續執行直到使用者退出
start_time = time.time() # 紀錄程式開始的時間
t1 = time.time() # 用來計算 FPS 的時間標記
color_intrin, depth_intrin, img_color, img_depth, aligned_depth_frame = get_aligned_images() # 獲取對齊的影像和內部參數
frame = cv2.cvtColor(img_color,cv2.COLOR_BGR2RGB) # 將 BGR 圖像轉換成 RGB
frame = Image.fromarray(np.uint8(frame)) # 將 numpy 陣列轉換成 Image 物件
image, out_boxes = yolo.detect_image(frame) # 使用 YOLO 偵測物件,回傳影像和邊界框
frame = np.array(image) # 將 Image 物件轉換回 numpy 陣列
frame = cv2.cvtColor(frame,cv2.COLOR_RGB2BGR) # 將 RGB 圖像轉換回 BGR
# 遍歷所有偵測到的邊界框
for top, left, bottom, right in out_boxes:
min_depth = float('inf') # 初始化最小深度為無窮大
max_depth = float('-inf') # 初始化最大深度為無窮小
min_depth_pixel = None # 初始化最小深度的像素點為 None
max_depth_pixel = None # 初始化最大深度的像素點為 None
# 遍歷邊界框內的所有像素
for y in range(int(top), int(bottom)):
for x in range(int(left), int(right)):
depth_pixel = [x, y] # 建立像素座標
dis, _ = get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin) # 獲取像素點的深度
# 更新最小和最大深度
if dis < min_depth:
min_depth = dis
min_depth_pixel = depth_pixel
if dis > max_depth:
max_depth = dis
max_depth_pixel = depth_pixel
# 計算邊界框的中心點座標
center_x = int((left + right) / 2)
center_y = int((top + bottom) / 2)
depth_pixel = [center_x, center_y]
center_depth, _ = get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin) # 獲取中心點的深度
# 印出最小、最大和中心深度的資訊
print(f"Minimum depth at: {min_depth_pixel}, value: {min_depth}")
print(f"Maximum depth at: {max_depth_pixel}, value: {max_depth}")
print(f"Center depth at: {depth_pixel}, value: {center_depth}")
cv2.setMouseCallback('RealSence', mouse_callback) # 設定滑鼠回調函數,用來處理滑鼠事件
# 在畫面上顯示 FPS
frame = cv2.putText(frame, "fps= %.2f"%(fps), (0, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
# 計算並更新 FPS
fps = ( fps + (1./(time.time()-t1)) ) / 2
print("fps= %.2f"%(fps)) # 印出 FPS
cv2.imshow('RealSence',frame) # 顯示處理後的畫面
key = cv2.waitKey(1)&0xff # 讀取使用者按下的按鍵
# 計算並印出執行時間
elapsed_time = time.time() - start_time
print("Time: %.2f seconds" % elapsed_time)
# 如果使用者按下 'q',則跳出迴圈
if key == ord('q'):
break
# 如果使用者按下 's',則等待使用者再次按下按鍵
if key == ord('s'):
cv2.waitKey(0)
pipeline.stop()
cv2.destroyAllWindows()
```
### def detectimg 程式
``` python=
def detect_image(self, image, crop = False, count = False):
#---------------------------------------------------------#
# 在这里将图像转换成RGB图像,防止灰度图在预测时报错。
# 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB
#---------------------------------------------------------#
image = cvtColor(image)
#---------------------------------------------------------#
# 给图像增加灰条,实现不失真的resize
# 也可以直接resize进行识别
#---------------------------------------------------------#
image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image)
#---------------------------------------------------------#
# 添加上batch_size维度,并进行归一化
#---------------------------------------------------------#
image_data = np.expand_dims(preprocess_input(np.array(image_data, dtype='float32')), 0)
#---------------------------------------------------------#
# 将图像输入网络当中进行预测!
#---------------------------------------------------------#
input_image_shape = np.expand_dims(np.array([image.size[1], image.size[0]], dtype='float32'), 0)
out_boxes, out_scores, out_classes = self.get_pred(image_data, input_image_shape)
print('Found {} boxes for {}'.format(len(out_boxes), 'img'))
#---------------------------------------------------------#
# 设置字体与边框厚度
#---------------------------------------------------------#
font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))
thickness = int(max((image.size[0] + image.size[1]) // np.mean(self.input_shape), 1))
#---------------------------------------------------------#
# 计数
#---------------------------------------------------------#
if count:
print("top_label:", out_classes)
classes_nums = np.zeros([self.num_classes])
for i in range(self.num_classes):
num = np.sum(out_classes == i)
if num > 0:
print(self.class_names[i], " : ", num)
classes_nums[i] = num
print("classes_nums:", classes_nums)
#---------------------------------------------------------#
# 是否进行目标的裁剪
#---------------------------------------------------------#
if crop:
for i, c in list(enumerate(out_boxes)):
top, left, bottom, right = out_boxes[i]
top = max(0, np.floor(top).astype('int32'))
left = max(0, np.floor(left).astype('int32'))
bottom = min(image.size[1], np.floor(bottom).astype('int32'))
right = min(image.size[0], np.floor(right).astype('int32'))
dir_save_path = "img_crop"
if not os.path.exists(dir_save_path):
os.makedirs(dir_save_path)
crop_image = image.crop([left, top, right, bottom])
crop_image.save(os.path.join(dir_save_path, "crop_" + str(i) + ".png"), quality=95, subsampling=0)
print("save crop_" + str(i) + ".png to " + dir_save_path)
#---------------------------------------------------------#
# 图像绘制
#---------------------------------------------------------#
for i, c in list(enumerate(out_classes)):
predicted_class = self.class_names[int(c)]
box = out_boxes[i]
score = out_scores[i]
top, left, bottom, right = box
top = max(0, np.floor(top).astype('int32'))
left = max(0, np.floor(left).astype('int32'))
bottom = min(image.size[1], np.floor(bottom).astype('int32'))
right = min(image.size[0], np.floor(right).astype('int32'))
label = '{} {:.2f}'.format(predicted_class, score)
draw = ImageDraw.Draw(image)
label_size = draw.textsize(label, font)
label = label.encode('utf-8')
print(label, top, left, bottom, right)
if top - label_size[1] >= 0:
text_origin = np.array([left, top - label_size[1]])
else:
text_origin = np.array([left, top + 1])
for i in range(thickness):
draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[c])
draw.rectangle([tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[c])
draw.text(text_origin, str(label,'UTF-8'), fill=(0, 0, 0), font=font)
del draw
return image, out_boxes
```
## 兩個bounding box相對位置關係(最近最遠兩個點的(x, y)座標)
``` python=
#匯入必要的函式庫
import pyrealsense2 as rs
import numpy as np
import cv2
import time
import tensorflow as tf
from PIL import Image
from yolo import YOLO, YOLO_ONNX
#獲取系統上所有GPU裝置
gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
for gpu in gpus:
#為每個GPU啟用記憶體增長
#可以避免Tensorflow一開始就使用所有的GPU記憶體
#而是按照需增加GPU記憶體使用量
tf.config.experimental.set_memory_growth(gpu, True)
#創建並啟動realsense pipeline
pipeline = rs.pipeline()
config = rs.config()
#運用深度及彩色串流
config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 15)
config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 15)
#啟動pipline
pipe_profile = pipeline.start(config)
#指定對齊目標為彩色串流
align_to = rs.stream.color
#將深度串流對齊至彩色串流
align = rs.align(align_to)
#創見一個視窗名為RealSense
cv2.namedWindow('RealSence',0)
def get_aligned_images():
frames = pipeline.wait_for_frames()#從realsense piplin獲取一組新的畫面
aligned_frames = align.process(frames)#將深度影像對齊至彩色影像,並返回一組新的已對齊畫面
aligned_depth_frame = aligned_frames.get_depth_frame()#從已對齊畫面取得深度畫面
aligned_color_frame = aligned_frames.get_color_frame()#從已對齊畫面取得彩色畫面
depth_intrin = aligned_depth_frame.profile.as_video_stream_profile().intrinsics#獲取深度畫面的內部參數,如:主座標
color_intrin = aligned_color_frame.profile.as_video_stream_profile().intrinsics#獲取彩色畫面的內部參數
img_color = np.asanyarray(aligned_color_frame.get_data())#將彩色畫面轉換Numpy陣列
img_depth = np.asanyarray(aligned_depth_frame.get_data())#將深度畫面轉換為Numpy陣列
return color_intrin, depth_intrin, img_color, img_depth, aligned_depth_frame#返回內部參數,彩色圖像,深度圖像及深度畫面
def get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin):
#將輸入的深度影像座標分解為x,y部份
x = depth_pixel[0]
y = depth_pixel[1]
#運用rs SDK get_distance方式取得指定像素深度值
dis = aligned_depth_frame.get_distance(x, y)
camera_coordinate = rs.rs2_deproject_pixel_to_point(depth_intrin, depth_pixel, dis)#運用rs SDK rs2_dep....函數將2D像素座標轉換到3D相機座標系中的點
return dis, camera_coordinate
def mouse_callback(event, x, y, flags, param):
if event == cv2.EVENT_LBUTTONDOWN:
print("滑鼠點擊事件座標 (x, y):", x, y)
depth_pixel = [x, y]
dis, camera_coordinate = get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin)
print ('深度: ',dis)
#print ('camera_coordinate: ',camera_coordinate)
#初始化幀率及YOLO模型
fps = 0.0
yolo = YOLO()
#主迴圈
if __name__=="__main__": # 確定此程式作為主程式執行
while True: # 持續執行直到使用者退出
start_time = time.time() # 紀錄程式開始的時間
t1 = time.time() # 用來計算 FPS 的時間標記
color_intrin, depth_intrin, img_color, img_depth, aligned_depth_frame = get_aligned_images() # 獲取對齊的影像和內部參數
frame = cv2.cvtColor(img_color,cv2.COLOR_BGR2RGB) # 將 BGR 圖像轉換成 RGB
frame = Image.fromarray(np.uint8(frame)) # 將 numpy 陣列轉換成 Image 物件
image, out_boxes = yolo.detect_image(frame) # 使用 YOLO 偵測物件,回傳影像和邊界框
frame = np.array(image) # 將 Image 物件轉換回 numpy 陣列
frame = cv2.cvtColor(frame,cv2.COLOR_RGB2BGR) # 將 RGB 圖像轉換回 BGR
# 遍歷所有偵測到的邊界框
center_points = []
for top, left, bottom, right in out_boxes:
min_depth = float('inf') # 初始化最小深度為無窮大
max_depth = float('-inf') # 初始化最大深度為無窮小
min_depth_pixel = None # 初始化最小深度的像素點為 None
max_depth_pixel = None # 初始化最大深度的像素點為 None
# 遍歷邊界框內的所有像素
for y in range(int(top), int(bottom)):
for x in range(int(left), int(right)):
depth_pixel = [x, y] # 建立像素座標
dis, _ = get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin) # 獲取像素點的深度
# 更新最小和最大深度
if dis > 0:
if dis < min_depth:
min_depth = dis
min_depth_pixel = depth_pixel
if dis > max_depth:
max_depth = dis
max_depth_pixel = depth_pixel
# 計算邊界框的中心點座標
center_x = int((left + right) / 2)
center_y = int((top + bottom) / 2)
depth_pixel = [center_x, center_y]
center_depth, _ = get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin) # 獲取中心點的深度
# 印出最小、最大和中心深度的資訊
print(f"Minimum depth at: {min_depth_pixel}, value: {min_depth}")
print(f"Maximum depth at: {max_depth_pixel}, value: {max_depth}")
print(f"Center depth at: {depth_pixel}, value: {center_depth}")
center_points.append((center_x, center_y, center_depth))
#檢查是否有至少兩個偵測到的邊界框,並計算兩個中心點之間的距離
if len(out_boxes) >= 2:
center_x1, center_y1, center_depth1 = center_points[0]
center_x2, center_y2, center_depth2 = center_points[1]
# 計算兩個中心點之間的距離
distance_x = center_x2 - center_x1
distance_y = center_y2 - center_y1
print(f"Center points distance (x, y): ({distance_x}, {distance_y})")
cv2.setMouseCallback('RealSence', mouse_callback) # 設定滑鼠回調函數,用來處理滑鼠事件
# 在畫面上顯示 FPS
frame = cv2.putText(frame, "fps= %.2f"%(fps), (0, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
# 計算並更新 FPS
fps = ( fps + (1./(time.time()-t1)) ) / 2
print("fps= %.2f"%(fps)) # 印出 FPS
cv2.imshow('RealSence',frame) # 顯示處理後的畫面
key = cv2.waitKey(1)&0xff # 讀取使用者按下的按鍵
# 計算並印出執行時間
elapsed_time = time.time() - start_time
print("Time: %.2f seconds" % elapsed_time)
# 如果使用者按下 'q',則跳出迴圈
if key == ord('q'):
break
# 如果使用者按下 's',則等待使用者再次按下按鍵
if key == ord('s'):
cv2.waitKey(0)
pipeline.stop()
cv2.destroyAllWindows()
```


## 增加顯示中心點宇最大最小值點
```python=
#匯入必要的函式庫
import pyrealsense2 as rs
import numpy as np
import cv2
import time
import tensorflow as tf
from PIL import Image
from yolo import YOLO, YOLO_ONNX
#獲取系統上所有GPU裝置
gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
for gpu in gpus:
#為每個GPU啟用記憶體增長
#可以避免Tensorflow一開始就使用所有的GPU記憶體
#而是按照需增加GPU記憶體使用量
tf.config.experimental.set_memory_growth(gpu, True)
GREEN_COLOR = (0, 255, 0)
RED_COLOR = (0, 0, 255)
YELLOW_COLOR = (0, 255, 255)
#創建並啟動realsense pipeline
pipeline = rs.pipeline()
config = rs.config()
#運用深度及彩色串流
config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 15)
config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 15)
#啟動pipline
pipe_profile = pipeline.start(config)
#指定對齊目標為彩色串流
align_to = rs.stream.color
#將深度串流對齊至彩色串流
align = rs.align(align_to)
#創見一個視窗名為RealSense
cv2.namedWindow('RealSence',0)
def get_aligned_images():
frames = pipeline.wait_for_frames()#從realsense piplin獲取一組新的畫面
aligned_frames = align.process(frames)#將深度影像對齊至彩色影像,並返回一組新的已對齊畫面
aligned_depth_frame = aligned_frames.get_depth_frame()#從已對齊畫面取得深度畫面
aligned_color_frame = aligned_frames.get_color_frame()#從已對齊畫面取得彩色畫面
depth_intrin = aligned_depth_frame.profile.as_video_stream_profile().intrinsics#獲取深度畫面的內部參數,如:主座標
color_intrin = aligned_color_frame.profile.as_video_stream_profile().intrinsics#獲取彩色畫面的內部參數
img_color = np.asanyarray(aligned_color_frame.get_data())#將彩色畫面轉換Numpy陣列
img_depth = np.asanyarray(aligned_depth_frame.get_data())#將深度畫面轉換為Numpy陣列
return color_intrin, depth_intrin, img_color, img_depth, aligned_depth_frame#返回內部參數,彩色圖像,深度圖像及深度畫面
def get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin):
#將輸入的深度影像座標分解為x,y部份
x = depth_pixel[0]
y = depth_pixel[1]
# 確保像素座標在有效範圍內
if x < 0 or x >= aligned_depth_frame.width or y < 0 or y >= aligned_depth_frame.height:
print("無效的像素座標。")
return 0, (0, 0, 0)
#運用rs SDK get_distance方式取得指定像素深度值
dis = aligned_depth_frame.get_distance(x, y)
camera_coordinate = rs.rs2_deproject_pixel_to_point(depth_intrin, depth_pixel, dis)#運用rs SDK rs2_dep....函數將2D像素座標轉換到3D相機座標系中的點
return dis, camera_coordinate
def mouse_callback(event, x, y, flags, param):
if event == cv2.EVENT_LBUTTONDOWN:
print("滑鼠點擊事件座標 (x, y):", x, y)
depth_pixel = [x, y]
dis, camera_coordinate = get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin)
print ('深度: ',dis)
#print ('camera_coordinate: ',camera_coordinate)
#初始化幀率及YOLO模型
fps = 0.0
yolo = YOLO()
#主迴圈
if __name__=="__main__": # 確定此程式作為主程式執行
while True: # 持續執行直到使用者退出
start_time = time.time() # 紀錄程式開始的時間
t1 = time.time() # 用來計算 FPS 的時間標記
color_intrin, depth_intrin, img_color, img_depth, aligned_depth_frame = get_aligned_images() # 獲取對齊的影像和內部參數
frame = cv2.cvtColor(img_color,cv2.COLOR_BGR2RGB) # 將 BGR 圖像轉換成 RGB
frame = Image.fromarray(np.uint8(frame)) # 將 numpy 陣列轉換成 Image 物件
image, out_boxes = yolo.detect_image(frame) # 使用 YOLO 偵測物件,回傳影像和邊界框
frame = np.array(image) # 將 Image 物件轉換回 numpy 陣列
frame = cv2.cvtColor(frame,cv2.COLOR_RGB2BGR) # 將 RGB 圖像轉換回 BGR
# 遍歷所有偵測到的邊界框
# 在遍歷邊界框之前,初始化變數來儲存最遠和最近的深度
min_depth_val = float('inf')
max_depth_val = float('-inf')
min_depth_point = None
max_depth_point = None
center_points = []
for top, left, bottom, right in out_boxes:
min_depth = float('inf') # 初始化最小深度為無窮大
max_depth = float('-inf') # 初始化最大深度為無窮小
min_depth_pixel = None # 初始化最小深度的像素點為 None
max_depth_pixel = None # 初始化最大深度的像素點為 None
# 遍歷邊界框內的所有像素
for y in range(int(top), int(bottom)):
for x in range(int(left), int(right)):
depth_pixel = [x, y] # 建立像素座標
dis, _ = get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin) # 獲取像素點的深度
# 更新最小和最大深度
if dis > 0:
if dis < min_depth:
min_depth = dis
min_depth_pixel = depth_pixel
if dis > max_depth:
max_depth = dis
max_depth_pixel = depth_pixel
if min_depth_pixel is not None:
min_depth_val = min_depth
min_depth_point = min_depth_pixel
if max_depth_pixel is not None:
max_depth_val = max_depth
max_depth_point = max_depth_pixel
# 計算邊界框的中心點座標
center_x = int((left + right) / 2)
center_y = int((top + bottom) / 2)
depth_pixel = [center_x, center_y]
center_depth, _ = get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin) # 獲取中心點的深度
# 印出最小、最大和中心深度的資訊
print(f"Minimum depth at: {min_depth_pixel}, value: {min_depth}")
print(f"Maximum depth at: {max_depth_pixel}, value: {max_depth}")
print(f"Center depth at: {depth_pixel}, value: {center_depth}")
# 在畫面上標示中心點及最大最小深度值點
cv2.circle(frame, (center_x, center_y), 5, GREEN_COLOR, -1)
cv2.circle(frame, (min_depth_point[0], min_depth_point[1]), 5, RED_COLOR, -1)
cv2.circle(frame, (max_depth_point[0], max_depth_point[1]), 5, YELLOW_COLOR, -1)
center_points.append((center_x, center_y, center_depth))
#檢查是否有至少兩個偵測到的邊界框,並計算兩個中心點之間的距離
if len(out_boxes) >= 2:
center_x1, center_y1, center_depth1 = center_points[0]
center_x2, center_y2, center_depth2 = center_points[1]
# 計算兩個中心點之間的距離
distance_x = center_x2 - center_x1
distance_y = center_y2 - center_y1
print(f"Center points distance (x, y): ({distance_x}, {distance_y})")
cv2.setMouseCallback('RealSence', mouse_callback) # 設定滑鼠回調函數,用來處理滑鼠事件
# 在畫面上顯示 FPS
frame = cv2.putText(frame, "fps= %.2f"%(fps), (0, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
# 計算並更新 FPS
fps = ( fps + (1./(time.time()-t1)) ) / 2
print("fps= %.2f"%(fps)) # 印出 FPS
cv2.imshow('RealSence',frame) # 顯示處理後的畫面
key = cv2.waitKey(1)&0xff # 讀取使用者按下的按鍵
# 計算並印出執行時間
elapsed_time = time.time() - start_time
print("Time: %.2f seconds" % elapsed_time)
# 如果使用者按下 'q',則跳出迴圈
if key == ord('q'):
break
# 如果使用者按下 's',則等待使用者再次按下按鍵
if key == ord('s'):
cv2.waitKey(0)
pipeline.stop()
cv2.destroyAllWindows()
```
{%youtube 1c-pVENgkVQ%}
## 改善fps
```python=
#匯入必要的函式庫
import pyrealsense2 as rs
import numpy as np
import cv2
import time
import tensorflow as tf
from PIL import Image
from yolo import YOLO, YOLO_ONNX
#獲取系統上所有GPU裝置
gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
for gpu in gpus:
#為每個GPU啟用記憶體增長
#可以避免Tensorflow一開始就使用所有的GPU記憶體
#而是按照需增加GPU記憶體使用量
tf.config.experimental.set_memory_growth(gpu, True)
GREEN_COLOR = (0, 255, 0)
RED_COLOR = (0, 0, 255)
YELLOW_COLOR = (0, 255, 255)
#創建並啟動realsense pipeline
pipeline = rs.pipeline()
config = rs.config()
#運用深度及彩色串流
config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 15)
config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 15)
#啟動pipline
pipe_profile = pipeline.start(config)
#指定對齊目標為彩色串流
align_to = rs.stream.color
#將深度串流對齊至彩色串流
align = rs.align(align_to)
#創見一個視窗名為RealSense
cv2.namedWindow('RealSence',0)
def get_aligned_images():
frames = pipeline.wait_for_frames()#從realsense piplin獲取一組新的畫面
aligned_frames = align.process(frames)#將深度影像對齊至彩色影像,並返回一組新的已對齊畫面
aligned_depth_frame = aligned_frames.get_depth_frame()#從已對齊畫面取得深度畫面
aligned_color_frame = aligned_frames.get_color_frame()#從已對齊畫面取得彩色畫面
depth_intrin = aligned_depth_frame.profile.as_video_stream_profile().intrinsics#獲取深度畫面的內部參數,如:主座標
color_intrin = aligned_color_frame.profile.as_video_stream_profile().intrinsics#獲取彩色畫面的內部參數
img_color = np.asanyarray(aligned_color_frame.get_data())#將彩色畫面轉換Numpy陣列
img_depth = np.asanyarray(aligned_depth_frame.get_data())#將深度畫面轉換為Numpy陣列
# 將彩色圖像調整為較小的解析度
resized_color = cv2.resize(img_color, (320, 240))
return color_intrin, depth_intrin, img_color, img_depth, aligned_depth_frame, resized_color#返回內部參數,彩色圖像,深度圖像及深度畫面
def get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin):
#將輸入的深度影像座標分解為x,y部份
x = depth_pixel[0]
y = depth_pixel[1]
# 確保像素座標在有效範圍內
if x < 0 or x >= aligned_depth_frame.width or y < 0 or y >= aligned_depth_frame.height:
print("無效的像素座標。")
return 0, (0, 0, 0)
#運用rs SDK get_distance方式取得指定像素深度值
dis = aligned_depth_frame.get_distance(x, y)
camera_coordinate = rs.rs2_deproject_pixel_to_point(depth_intrin, depth_pixel, dis)#運用rs SDK rs2_dep....函數將2D像素座標轉換到3D相機座標系中的點
return dis, camera_coordinate
def mouse_callback(event, x, y, flags, param):
if event == cv2.EVENT_LBUTTONDOWN:
print("滑鼠點擊事件座標 (x, y):", x, y)
depth_pixel = [x, y]
dis, camera_coordinate = get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin)
print ('深度: ',dis)
#print ('camera_coordinate: ',camera_coordinate)
#初始化幀率及YOLO模型
fps = 0.0
yolo = YOLO()
yolo.confidence = 0.3
frame_skip = 5
skip_counter = 0
#主迴圈
if __name__=="__main__": # 確定此程式作為主程式執行
while True: # 持續執行直到使用者退出
start_time = time.time() # 紀錄程式開始的時間
t1 = time.time()
color_intrin, depth_intrin, img_color, img_depth, aligned_depth_frame , resized_color= get_aligned_images() # 獲取對齊的影像和內部參數
frame = cv2.cvtColor(img_color,cv2.COLOR_BGR2RGB) # 將 BGR 圖像轉換成 RGB
frame = Image.fromarray(np.uint8(frame)) # 將 numpy 陣列轉換成 Image 物件
image, out_boxes = yolo.detect_image(frame) # 使用 YOLO 偵測物件,回傳影像和邊界框
frame = np.array(image) # 將 Image 物件轉換回 numpy 陣列
frame = cv2.cvtColor(frame,cv2.COLOR_RGB2BGR) # 將 RGB 圖像轉換回 BGR
# 遍歷所有偵測到的邊界框
# 在遍歷邊界框之前,初始化變數來儲存最遠和最近的深度
min_depth_val = float('inf')
max_depth_val = float('-inf')
min_depth_point = None
max_depth_point = None
center_points = []
for top, left, bottom, right in out_boxes:
min_depth = float('inf') # 初始化最小深度為無窮大
max_depth = float('-inf') # 初始化最大深度為無窮小
min_depth_pixel = None # 初始化最小深度的像素點為 None
max_depth_pixel = None # 初始化最大深度的像素點為 None
# 遍歷邊界框內的所有像素
for y in range(int(top), int(bottom)):
for x in range(int(left), int(right)):
depth_pixel = [x, y] # 建立像素座標
dis, _ = get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin) # 獲取像素點的深度
# 更新最小和最大深度
if dis > 0:
if dis < min_depth:
min_depth = dis
min_depth_pixel = depth_pixel
if dis > max_depth:
max_depth = dis
max_depth_pixel = depth_pixel
if min_depth_pixel is not None:
min_depth_val = min_depth
min_depth_point = min_depth_pixel
if max_depth_pixel is not None:
max_depth_val = max_depth
max_depth_point = max_depth_pixel
# 計算邊界框的中心點座標
center_x = int((left + right) / 2)
center_y = int((top + bottom) / 2)
depth_pixel = [center_x, center_y]
center_depth, _ = get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin) # 獲取中心點的深度
# 印出最小、最大和中心深度的資訊
print(f"Minimum depth at: {min_depth_pixel}, value: {min_depth}")
print(f"Maximum depth at: {max_depth_pixel}, value: {max_depth}")
print(f"Center depth at: {depth_pixel}, value: {center_depth}")
# 在畫面上標示中心點及最大最小深度值點
cv2.circle(frame, (center_x, center_y), 5, GREEN_COLOR, -1)
cv2.circle(frame, (min_depth_point[0], min_depth_point[1]), 5, RED_COLOR, -1)
cv2.circle(frame, (max_depth_point[0], max_depth_point[1]), 5, YELLOW_COLOR, -1)
center_points.append((center_x, center_y, center_depth))
#檢查是否有至少兩個偵測到的邊界框,並計算兩個中心點之間的距離
if len(out_boxes) >= 2:
center_x1, center_y1, center_depth1 = center_points[0]
center_x2, center_y2, center_depth2 = center_points[1]
# 計算兩個中心點之間的距離
distance_x = center_x2 - center_x1
distance_y = center_y2 - center_y1
print(f"Center points distance (x, y): ({distance_x}, {distance_y})")
cv2.setMouseCallback('RealSence', mouse_callback) # 設定滑鼠回調函數,用來處理滑鼠事件
# 在畫面上顯示 FPS
frame = cv2.putText(frame, "fps= %.2f"%(fps), (0, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
# 計算並更新 FPS
fps = ( fps + (1./(time.time()-t1)) ) / 2
print("fps= %.2f"%(fps)) # 印出 FPS
cv2.imshow('RealSence',frame) # 顯示處理後的畫面
key = cv2.waitKey(1)&0xff # 讀取使用者按下的按鍵
# 計算並印出執行時間
elapsed_time = time.time() - start_time
print("Time: %.2f seconds" % elapsed_time)
# 如果使用者按下 'q',則跳出迴圈
if key == ord('q'):
break
# 如果使用者按下 's',則等待使用者再次按下按鍵
if key == ord('s'):
cv2.waitKey(0)
pipeline.stop()
cv2.destroyAllWindows()
```
## ==運用抓取物體輪廓、邊緣技術來得知物體最大最小深度==
```python=
#匯入必要的函式庫
import pyrealsense2 as rs
import numpy as np
import cv2
import time
import tensorflow as tf
from PIL import Image
from yolo import YOLO, YOLO_ONNX
#獲取系統上所有GPU裝置
gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
for gpu in gpus:
#為每個GPU啟用記憶體增長
#可以避免Tensorflow一開始就使用所有的GPU記憶體
#而是按照需增加GPU記憶體使用量
tf.config.experimental.set_memory_growth(gpu, True)
GREEN_COLOR = (0, 255, 0)
RED_COLOR = (0, 0, 255)
YELLOW_COLOR = (0, 255, 255)
#創建並啟動realsense pipeline
pipeline = rs.pipeline()
config = rs.config()
#運用深度及彩色串流
config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 15)
config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 15)
#啟動pipline
pipe_profile = pipeline.start(config)
#指定對齊目標為彩色串流
align_to = rs.stream.color
#將深度串流對齊至彩色串流
align = rs.align(align_to)
#創見一個視窗名為RealSense
cv2.namedWindow('RealSence',0)
def get_aligned_images():
frames = pipeline.wait_for_frames()#從realsense piplin獲取一組新的畫面
aligned_frames = align.process(frames)#將深度影像對齊至彩色影像,並返回一組新的已對齊畫面
aligned_depth_frame = aligned_frames.get_depth_frame()#從已對齊畫面取得深度畫面
aligned_color_frame = aligned_frames.get_color_frame()#從已對齊畫面取得彩色畫面
depth_intrin = aligned_depth_frame.profile.as_video_stream_profile().intrinsics#獲取深度畫面的內部參數,如:主座標
color_intrin = aligned_color_frame.profile.as_video_stream_profile().intrinsics#獲取彩色畫面的內部參數
img_color = np.asanyarray(aligned_color_frame.get_data())#將彩色畫面轉換Numpy陣列
img_depth = np.asanyarray(aligned_depth_frame.get_data())#將深度畫面轉換為Numpy陣列
# 將彩色圖像調整為較小的解析度
resized_color = cv2.resize(img_color, (320, 240))
return color_intrin, depth_intrin, img_color, img_depth, aligned_depth_frame, resized_color#返回內部參數,彩色圖像,深度圖像及深度畫面
def get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin):
#將輸入的深度影像座標分解為x,y部份
x = depth_pixel[0]
y = depth_pixel[1]
# 確保像素座標在有效範圍內
if x < 0 or x >= aligned_depth_frame.width or y < 0 or y >= aligned_depth_frame.height:
print("無效的像素座標。")
return 0, (0, 0, 0)
#運用rs SDK get_distance方式取得指定像素深度值
dis = aligned_depth_frame.get_distance(x, y)
camera_coordinate = rs.rs2_deproject_pixel_to_point(depth_intrin, depth_pixel, dis)#運用rs SDK rs2_dep....函數將2D像素座標轉換到3D相機座標系中的點
return dis, camera_coordinate
def mouse_callback(event, x, y, flags, param):
if event == cv2.EVENT_LBUTTONDOWN:
print("滑鼠點擊事件座標 (x, y):", x, y)
depth_pixel = [x, y]
dis, camera_coordinate = get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin)
print ('深度: ',dis)
#print ('camera_coordinate: ',camera_coordinate)
#初始化幀率及YOLO模型
fps = 0.0
yolo = YOLO()
yolo.confidence = 0.3
frame_skip = 5
skip_counter = 0
#主迴圈
if __name__=="__main__": # 確定此程式作為主程式執行
while True: # 持續執行直到使用者退出
start_time = time.time() # 紀錄程式開始的時間
t1 = time.time()
color_intrin, depth_intrin, img_color, img_depth, aligned_depth_frame , resized_color= get_aligned_images() # 獲取對齊的影像和內部參數
frame = cv2.cvtColor(img_color,cv2.COLOR_BGR2RGB) # 將 BGR 圖像轉換成 RGB
frame = Image.fromarray(np.uint8(frame)) # 將 numpy 陣列轉換成 Image 物件
image, out_boxes = yolo.detect_image(frame) # 使用 YOLO 偵測物件,回傳影像和邊界框
frame = np.array(image) # 將 Image 物件轉換回 numpy 陣列
frame = cv2.cvtColor(frame,cv2.COLOR_RGB2BGR) # 將 RGB 圖像轉換回 BGR
# 遍歷所有偵測到的邊界框
# 在遍歷邊界框之前,初始化變數來儲存最遠和最近的深度
min_depth_val = float('inf')
max_depth_val = float('-inf')
min_depth_point = None
max_depth_point = None
center_points = []
for top, left, bottom, right in out_boxes:
roi_color = img_color[int(top):int(bottom), int(left):int(right)]
roi_gray = cv2.cvtColor(roi_color, cv2.COLOR_BGR2GRAY)
edges = cv2.Canny(roi_gray, 50, 150)
min_depth = float('inf') # 初始化最小深度為無窮大
max_depth = float('-inf') # 初始化最大深度為無窮小
min_depth_pixel = None # 初始化最小深度的像素點為 None
max_depth_pixel = None # 初始化最大深度的像素點為 None
# 遍歷邊界框內的所有像素
edge_points = np.argwhere(edges > 0)
for point in edge_points:
y, x = point
depth_pixel = [int(left) + x, int(top) + y]
dis, _ = get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin)
# 更新最小和最大深度
if dis > 0:
if dis < min_depth:
min_depth = dis
min_depth_pixel = depth_pixel
if dis > max_depth:
max_depth = dis
max_depth_pixel = depth_pixel
if min_depth_pixel is not None:
cv2.circle(frame, (min_depth_pixel[0], min_depth_pixel[1]), 5, RED_COLOR, -1)
if max_depth_pixel is not None:
cv2.circle(frame, (max_depth_pixel[0], max_depth_pixel[1]), 5, YELLOW_COLOR, -1)
# 計算邊界框的中心點座標
center_x = int((left + right) / 2)
center_y = int((top + bottom) / 2)
depth_pixel = [center_x, center_y]
center_depth, _ = get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin) # 獲取中心點的深度
# 印出最小、最大和中心深度的資訊
print(f"Minimum depth at: {min_depth_pixel}, value: {min_depth}")
print(f"Maximum depth at: {max_depth_pixel}, value: {max_depth}")
print(f"Center depth at: {depth_pixel}, value: {center_depth}")
# 在畫面上標示中心點及最大最小深度值點
cv2.circle(frame, (center_x, center_y), 5, GREEN_COLOR, -1)
center_points.append((center_x, center_y, center_depth))
#檢查是否有至少兩個偵測到的邊界框,並計算兩個中心點之間的距離
if len(out_boxes) >= 2:
center_x1, center_y1, center_depth1 = center_points[0]
center_x2, center_y2, center_depth2 = center_points[1]
# 計算兩個中心點之間的距離
distance_x = center_x2 - center_x1
distance_y = center_y2 - center_y1
print(f"Center points distance (x, y): ({distance_x}, {distance_y})")
cv2.setMouseCallback('RealSence', mouse_callback) # 設定滑鼠回調函數,用來處理滑鼠事件
# 在畫面上顯示 FPS
frame = cv2.putText(frame, "fps= %.2f"%(fps), (0, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
# 計算並更新 FPS
fps = ( fps + (1./(time.time()-t1)) ) / 2
print("fps= %.2f"%(fps)) # 印出 FPS
cv2.imshow('RealSence',frame) # 顯示處理後的畫面
key = cv2.waitKey(1)&0xff # 讀取使用者按下的按鍵
# 計算並印出執行時間
elapsed_time = time.time() - start_time
print("Time: %.2f seconds" % elapsed_time)
# 如果使用者按下 'q',則跳出迴圈
if key == ord('q'):
break
# 如果使用者按下 's',則等待使用者再次按下按鍵
if key == ord('s'):
cv2.waitKey(0)
pipeline.stop()
cv2.destroyAllWindows()
```
{%youtube MipJxfCVfQM%}
## ==運用前後景分離方式取得物體最大最小的點及中心值得點==
```python=
#匯入必要的函式庫
import pyrealsense2 as rs
import numpy as np
import cv2
import time
import tensorflow as tf
from PIL import Image
from yolo import YOLO, YOLO_ONNX
#獲取系統上所有GPU裝置
gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
for gpu in gpus:
#為每個GPU啟用記憶體增長
#可以避免Tensorflow一開始就使用所有的GPU記憶體
#而是按照需增加GPU記憶體使用量
tf.config.experimental.set_memory_growth(gpu, True)
GREEN_COLOR = (0, 255, 0)
RED_COLOR = (0, 0, 255)
YELLOW_COLOR = (0, 255, 255)
#創建並啟動realsense pipeline
pipeline = rs.pipeline()
config = rs.config()
#運用深度及彩色串流
config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 15)
config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 15)
#啟動pipline
pipe_profile = pipeline.start(config)
#指定對齊目標為彩色串流
align_to = rs.stream.color
#將深度串流對齊至彩色串流
align = rs.align(align_to)
#創見一個視窗名為RealSense
cv2.namedWindow('RealSence',0)
def get_aligned_images():
frames = pipeline.wait_for_frames()#從realsense piplin獲取一組新的畫面
aligned_frames = align.process(frames)#將深度影像對齊至彩色影像,並返回一組新的已對齊畫面
aligned_depth_frame = aligned_frames.get_depth_frame()#從已對齊畫面取得深度畫面
aligned_color_frame = aligned_frames.get_color_frame()#從已對齊畫面取得彩色畫面
depth_intrin = aligned_depth_frame.profile.as_video_stream_profile().intrinsics#獲取深度畫面的內部參數,如:主座標
color_intrin = aligned_color_frame.profile.as_video_stream_profile().intrinsics#獲取彩色畫面的內部參數
img_color = np.asanyarray(aligned_color_frame.get_data())#將彩色畫面轉換Numpy陣列
img_depth = np.asanyarray(aligned_depth_frame.get_data())#將深度畫面轉換為Numpy陣列
# 將彩色圖像調整為較小的解析度
resized_color = cv2.resize(img_color, (320, 240))
return color_intrin, depth_intrin, img_color, img_depth, aligned_depth_frame, resized_color#返回內部參數,彩色圖像,深度圖像及深度畫面
def get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin):
#將輸入的深度影像座標分解為x,y部份
x = depth_pixel[0]
y = depth_pixel[1]
# 確保像素座標在有效範圍內
if x < 0 or x >= aligned_depth_frame.width or y < 0 or y >= aligned_depth_frame.height:
print("無效的像素座標。")
return 0, (0, 0, 0)
#運用rs SDK get_distance方式取得指定像素深度值
dis = aligned_depth_frame.get_distance(x, y)
camera_coordinate = rs.rs2_deproject_pixel_to_point(depth_intrin, depth_pixel, dis)#運用rs SDK rs2_dep....函數將2D像素座標轉換到3D相機座標系中的點
return dis, camera_coordinate
def mouse_callback(event, x, y, flags, param):
if event == cv2.EVENT_LBUTTONDOWN:
print("滑鼠點擊事件座標 (x, y):", x, y)
depth_pixel = [x, y]
dis, camera_coordinate = get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin)
print ('深度: ',dis)
#print ('camera_coordinate: ',camera_coordinate)
#初始化幀率及YOLO模型
fps = 0.0
yolo = YOLO()
yolo.confidence = 0.3
frame_skip = 5
skip_counter = 0
fgbg = cv2.createBackgroundSubtractorMOG2()
#主迴圈
if __name__=="__main__": # 確定此程式作為主程式執行
while True: # 持續執行直到使用者退出
start_time = time.time() # 紀錄程式開始的時間
t1 = time.time()
color_intrin, depth_intrin, img_color, img_depth, aligned_depth_frame , resized_color= get_aligned_images() # 獲取對齊的影像和內部參數
# 進行前後景分離
fgmask = fgbg.apply(img_color)
# 將前景mask應用到彩色圖像上,以得到只有前景的彩色圖像
fgimg = cv2.bitwise_and(img_color, img_color, mask=fgmask)
# 你可以選擇對fgimg(前景圖像)或img_color(原始圖像)進行後續處理
frame = cv2.cvtColor(fgimg, cv2.COLOR_BGR2RGB)
frame = cv2.cvtColor(img_color,cv2.COLOR_BGR2RGB) # 將 BGR 圖像轉換成 RGB
frame = Image.fromarray(np.uint8(frame)) # 將 numpy 陣列轉換成 Image 物件
image, out_boxes = yolo.detect_image(frame) # 使用 YOLO 偵測物件,回傳影像和邊界框
frame = np.array(image) # 將 Image 物件轉換回 numpy 陣列
frame = cv2.cvtColor(frame,cv2.COLOR_RGB2BGR) # 將 RGB 圖像轉換回 BGR
# 遍歷所有偵測到的邊界框
# 在遍歷邊界框之前,初始化變數來儲存最遠和最近的深度
min_depth_val = float('inf')
max_depth_val = float('-inf')
min_depth_point = None
max_depth_point = None
center_points = []
for top, left, bottom, right in out_boxes:
min_depth = float('inf') # 初始化最小深度為無窮大
max_depth = float('-inf') # 初始化最大深度為無窮小
min_depth_pixel = None # 初始化最小深度的像素點為 None
max_depth_pixel = None # 初始化最大深度的像素點為 None
# 遍歷邊界框內的所有像素
dis = 0
for y in range(int(top), int(bottom)):
for x in range(int(left), int(right)):
if fgmask[y, x] != 0:
depth_pixel = [x, y]
# 獲取深度值,這取決於你的深度圖像的表示
dis = aligned_depth_frame.get_distance(x, y) # 只是一個示例,你可能需要自己的深度獲取方法
# 更新最小和最大深度
if dis > 0:
if dis < min_depth:
min_depth = dis
min_depth_pixel = depth_pixel
if dis > max_depth:
max_depth = dis
max_depth_pixel = depth_pixel
if min_depth_pixel is not None:
min_depth_val = min_depth
min_depth_point = min_depth_pixel
if max_depth_pixel is not None:
max_depth_val = max_depth
max_depth_point = max_depth_pixel
# 計算邊界框的中心點座標
center_x = int((left + right) / 2)
center_y = int((top + bottom) / 2)
depth_pixel = [center_x, center_y]
center_depth, _ = get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin) # 獲取中心點的深度
# 印出最小、最大和中心深度的資訊
print(f"Minimum depth at: {min_depth_pixel}, value: {min_depth}")
print(f"Maximum depth at: {max_depth_pixel}, value: {max_depth}")
print(f"Center depth at: {depth_pixel}, value: {center_depth}")
# 在畫面上標示中心點及最大最小深度值點
cv2.circle(frame, (center_x, center_y), 5, GREEN_COLOR, -1)
if min_depth_point:
cv2.circle(frame, (min_depth_point[0], min_depth_point[1]), 5, RED_COLOR, -1)
if max_depth_point:
cv2.circle(frame, (max_depth_point[0], max_depth_point[1]), 5, YELLOW_COLOR, -1)
center_points.append((center_x, center_y, center_depth))
#檢查是否有至少兩個偵測到的邊界框,並計算兩個中心點之間的距離
if len(out_boxes) >= 2:
center_x1, center_y1, center_depth1 = center_points[0]
center_x2, center_y2, center_depth2 = center_points[1]
# 計算兩個中心點之間的距離
distance_x = center_x2 - center_x1
distance_y = center_y2 - center_y1
print(f"Center points distance (x, y): ({distance_x}, {distance_y})")
cv2.setMouseCallback('RealSence', mouse_callback) # 設定滑鼠回調函數,用來處理滑鼠事件
# 在畫面上顯示 FPS
frame = cv2.putText(frame, "fps= %.2f"%(fps), (0, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
# 計算並更新 FPS
fps = ( fps + (1./(time.time()-t1)) ) / 2
print("fps= %.2f"%(fps)) # 印出 FPS
cv2.imshow('RealSence',frame) # 顯示處理後的畫面
cv2.imshow('Foreground', fgimg)
key = cv2.waitKey(1)&0xff # 讀取使用者按下的按鍵
# 計算並印出執行時間
elapsed_time = time.time() - start_time
print("Time: %.2f seconds" % elapsed_time)
# 如果使用者按下 'q',則跳出迴圈
if key == ord('q'):
break
# 如果使用者按下 's',則等待使用者再次按下按鍵
if key == ord('s'):
cv2.waitKey(0)
pipeline.stop()
cv2.destroyAllWindows()
```
{%youtube 4pSuyYCnBjc%}
## 直接顯示出要的內容再右邊視窗
```python=
#匯入必要的函式庫
import pyrealsense2 as rs
import numpy as np
import cv2
import time
import tensorflow as tf
from PIL import Image
from yolo import YOLO, YOLO_ONNX
#獲取系統上所有GPU裝置
gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
for gpu in gpus:
#為每個GPU啟用記憶體增長
#可以避免Tensorflow一開始就使用所有的GPU記憶體
#而是按照需增加GPU記憶體使用量
tf.config.experimental.set_memory_growth(gpu, True)
GREEN_COLOR = (0, 255, 0)
RED_COLOR = (0, 0, 255)
YELLOW_COLOR = (0, 255, 255)
# 創建一個足夠大的畫布
canvas_height, canvas_width = 480, 1280
canvas = np.zeros((canvas_height, canvas_width, 3), dtype=np.uint8)
# 設定左半邊和右半邊的區域
left_half = canvas[:, :canvas_width // 2, :]
right_half = canvas[:, canvas_width // 2:, :]
#創建並啟動realsense pipeline
pipeline = rs.pipeline()
config = rs.config()
#運用深度及彩色串流
config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 15)
config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 15)
#啟動pipline
pipe_profile = pipeline.start(config)
#指定對齊目標為彩色串流
align_to = rs.stream.color
#將深度串流對齊至彩色串流
align = rs.align(align_to)
#創見一個視窗名為RealSense
cv2.namedWindow('RealSence',0)
def get_aligned_images():
frames = pipeline.wait_for_frames()#從realsense piplin獲取一組新的畫面
aligned_frames = align.process(frames)#將深度影像對齊至彩色影像,並返回一組新的已對齊畫面
aligned_depth_frame = aligned_frames.get_depth_frame()#從已對齊畫面取得深度畫面
aligned_color_frame = aligned_frames.get_color_frame()#從已對齊畫面取得彩色畫面
depth_intrin = aligned_depth_frame.profile.as_video_stream_profile().intrinsics#獲取深度畫面的內部參數,如:主座標
color_intrin = aligned_color_frame.profile.as_video_stream_profile().intrinsics#獲取彩色畫面的內部參數
img_color = np.asanyarray(aligned_color_frame.get_data())#將彩色畫面轉換Numpy陣列
img_depth = np.asanyarray(aligned_depth_frame.get_data())#將深度畫面轉換為Numpy陣列
# 將彩色圖像調整為較小的解析度
resized_color = cv2.resize(img_color, (320, 240))
return color_intrin, depth_intrin, img_color, img_depth, aligned_depth_frame, resized_color#返回內部參數,彩色圖像,深度圖像及深度畫面
def get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin):
#將輸入的深度影像座標分解為x,y部份
x = depth_pixel[0]
y = depth_pixel[1]
# 確保像素座標在有效範圍內
if x < 0 or x >= aligned_depth_frame.width or y < 0 or y >= aligned_depth_frame.height:
print("無效的像素座標。")
return 0, (0, 0, 0)
#運用rs SDK get_distance方式取得指定像素深度值
dis = aligned_depth_frame.get_distance(x, y)
camera_coordinate = rs.rs2_deproject_pixel_to_point(depth_intrin, depth_pixel, dis)#運用rs SDK rs2_dep....函數將2D像素座標轉換到3D相機座標系中的點
return dis, camera_coordinate
def mouse_callback(event, x, y, flags, param):
if event == cv2.EVENT_LBUTTONDOWN:
print("滑鼠點擊事件座標 (x, y):", x, y)
depth_pixel = [x, y]
dis, camera_coordinate = get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin)
print ('深度: ',dis)
#print ('camera_coordinate: ',camera_coordinate)
#初始化幀率及YOLO模型
fps = 0.0
yolo = YOLO()
yolo.confidence = 0.3
frame_skip = 5
skip_counter = 0
#主迴圈
if __name__=="__main__": # 確定此程式作為主程式執行
while True: # 持續執行直到使用者退出
start_time = time.time() # 紀錄程式開始的時間
t1 = time.time()
color_intrin, depth_intrin, img_color, img_depth, aligned_depth_frame, resized_color = get_aligned_images() # 獲取對齊的影像和內部參數
frame = cv2.cvtColor(img_color, cv2.COLOR_BGR2RGB) # 將 BGR 圖像轉換成 RGB
frame = Image.fromarray(np.uint8(frame)) # 將 numpy 陣列轉換成 Image 物件
image, out_boxes = yolo.detect_image(frame) # 使用 YOLO 偵測物件,只回傳邊界框
frame = np.array(image) # 將 Image 物件轉換回 numpy 陣列
frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
# 將影像繪製到左半邊
# 遍歷所有偵測到的邊界框,並根據需要將它們分配到左半邊或右半邊
for i, (top, left, bottom, right) in enumerate(out_boxes):
object_class = yolo.class_names[i] # 獲取物品的種類
text = f"Object {i + 1}: {object_class}"
cv2.putText(left_half, text, (50, 50 + i * 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
# 在左半邊的彩色影像上顯示物品偵測結果
for i, (top, left, bottom, right) in enumerate(out_boxes):
object_class = yolo.class_names[i] # 獲取物品的種類
text = f"Object {i + 1}: {object_class}"
cv2.putText(left_half, text, (50, 50 + i * 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
# 在右半邊的區域上顯示右半邊的物品名稱
for i, (top, left, bottom, right) in enumerate(out_boxes):
object_class = yolo.class_names[i] # 獲取物品的種類
text = f"Object {i + 1}: {object_class}"
cv2.putText(right_half, text, (50, 50 + i * 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
left_half[:, :, :] = frame
# 遍歷所有偵測到的邊界框
# 在遍歷邊界框之前,初始化變數來儲存最遠和最近的深度
min_depth_val = float('inf')
max_depth_val = float('-inf')
min_depth_point = None
max_depth_point = None
center_points = []
for top, left, bottom, right in out_boxes:
min_depth = float('inf') # 初始化最小深度為無窮大
max_depth = float('-inf') # 初始化最大深度為無窮小
min_depth_pixel = None # 初始化最小深度的像素點為 None
max_depth_pixel = None # 初始化最大深度的像素點為 None
# 遍歷邊界框內的所有像素
for y in range(int(top), int(bottom)):
for x in range(int(left), int(right)):
depth_pixel = [x, y] # 建立像素座標
dis, _ = get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin) # 獲取像素點的深度
# 更新最小和最大深度
if dis > 0:
if dis < min_depth:
min_depth = dis
min_depth_pixel = depth_pixel
if dis > max_depth:
max_depth = dis
max_depth_pixel = depth_pixel
if min_depth_pixel is not None:
min_depth_val = min_depth
min_depth_point = min_depth_pixel
if max_depth_pixel is not None:
max_depth_val = max_depth
max_depth_point = max_depth_pixel
# 計算邊界框的中心點座標
center_x = int((left + right) / 2)
center_y = int((top + bottom) / 2)
depth_pixel = [center_x, center_y]
center_depth, _ = get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin) # 獲取中心點的深度
# 印出最小、最大和中心深度的資訊
print(f"Minimum depth at: {min_depth_pixel}, value: {min_depth}")
print(f"Maximum depth at: {max_depth_pixel}, value: {max_depth}")
print(f"Center depth at: {depth_pixel}, value: {center_depth}")
# 在畫面上標示中心點及最大最小深度值點
cv2.circle(frame, (center_x, center_y), 5, GREEN_COLOR, -1)
cv2.circle(frame, (min_depth_point[0], min_depth_point[1]), 5, RED_COLOR, -1)
cv2.circle(frame, (max_depth_point[0], max_depth_point[1]), 5, YELLOW_COLOR, -1)
center_points.append((center_x, center_y, center_depth))
#檢查是否有至少兩個偵測到的邊界框,並計算兩個中心點之間的距離
if len(out_boxes) >= 2:
center_x1, center_y1, center_depth1 = center_points[0]
center_x2, center_y2, center_depth2 = center_points[1]
# 計算兩個中心點之間的距離
distance_x = center_x2 - center_x1
distance_y = center_y2 - center_y1
print(f"Center points distance (x, y): ({distance_x}, {distance_y})")
cv2.setMouseCallback('RealSence', mouse_callback) # 設定滑鼠回調函數,用來處理滑鼠事件
# 在畫面上顯示 FPS
frame = cv2.putText(frame, "fps= %.2f"%(fps), (0, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
# 計算並更新 FPS
fps = ( fps + (1./(time.time()-t1)) ) / 2
print("fps= %.2f"%(fps)) # 印出 FPS
cv2.imshow('RealSense', canvas) # 顯示處理後的畫面
key = cv2.waitKey(1)&0xff # 讀取使用者按下的按鍵
# 計算並印出執行時間
elapsed_time = time.time() - start_time
print("Time: %.2f seconds" % elapsed_time)
# 如果使用者按下 'q',則跳出迴圈
if key == ord('q'):
break
# 如果使用者按下 's',則等待使用者再次按下按鍵
if key == ord('s'):
cv2.waitKey(0)
pipeline.stop()
cv2.destroyAllWindows()
```
## 簡易的關係顯示出來
```python=
#匯入必要的函式庫
import pyrealsense2 as rs
import numpy as np
import cv2
import time
import tensorflow as tf
from PIL import Image
from yolo import YOLO, YOLO_ONNX
#獲取系統上所有GPU裝置
gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
for gpu in gpus:
#為每個GPU啟用記憶體增長
#可以避免Tensorflow一開始就使用所有的GPU記憶體
#而是按照需增加GPU記憶體使用量
tf.config.experimental.set_memory_growth(gpu, True)
GREEN_COLOR = (0, 255, 0)
RED_COLOR = (0, 0, 255)
YELLOW_COLOR = (0, 255, 255)
# 設定更大的畫布大小
canvas_height, canvas_width = 720, 1800 # 您可以根據需要調整大小
# 創建一個足夠大的畫布
canvas = np.zeros((canvas_height, canvas_width, 3), dtype=np.uint8)
# 設定左半邊和右半邊的區域
left_half = canvas[:, :canvas_width // 2, :]
# 設定右半邊的上半部分和下半部分
top_half = canvas[:canvas_height // 2, canvas_width // 2:, :]
bottom_half = canvas[canvas_height // 2:, canvas_width // 2:, :]
displayed_relations = set()
#創建並啟動realsense pipeline
pipeline = rs.pipeline()
config = rs.config()
#運用深度及彩色串流
config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 15)
config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 15)
#啟動pipline
pipe_profile = pipeline.start(config)
#指定對齊目標為彩色串流
align_to = rs.stream.color
#將深度串流對齊至彩色串流
align = rs.align(align_to)
#創見一個視窗名為RealSense
cv2.namedWindow('RealSence',0)
def get_aligned_images():
frames = pipeline.wait_for_frames()#從realsense piplin獲取一組新的畫面
aligned_frames = align.process(frames)#將深度影像對齊至彩色影像,並返回一組新的已對齊畫面
aligned_depth_frame = aligned_frames.get_depth_frame()#從已對齊畫面取得深度畫面
aligned_color_frame = aligned_frames.get_color_frame()#從已對齊畫面取得彩色畫面
depth_intrin = aligned_depth_frame.profile.as_video_stream_profile().intrinsics#獲取深度畫面的內部參數,如:主座標
color_intrin = aligned_color_frame.profile.as_video_stream_profile().intrinsics#獲取彩色畫面的內部參數
img_color = np.asanyarray(aligned_color_frame.get_data())#將彩色畫面轉換Numpy陣列
img_depth = np.asanyarray(aligned_depth_frame.get_data())#將深度畫面轉換為Numpy陣列
# 將彩色圖像調整為較小的解析度
resized_color = cv2.resize(img_color, (320, 240))
return color_intrin, depth_intrin, img_color, img_depth, aligned_depth_frame, resized_color#返回內部參數,彩色圖像,深度圖像及深度畫面
def get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin):
#將輸入的深度影像座標分解為x,y部份
x = depth_pixel[0]
y = depth_pixel[1]
# 確保像素座標在有效範圍內
if x < 0 or x >= aligned_depth_frame.width or y < 0 or y >= aligned_depth_frame.height:
print("無效的像素座標。")
return 0, (0, 0, 0)
#運用rs SDK get_distance方式取得指定像素深度值
dis = aligned_depth_frame.get_distance(x, y)
camera_coordinate = rs.rs2_deproject_pixel_to_point(depth_intrin, depth_pixel, dis)#運用rs SDK rs2_dep....函數將2D像素座標轉換到3D相機座標系中的點
return dis, camera_coordinate
def mouse_callback(event, x, y, flags, param):
if event == cv2.EVENT_LBUTTONDOWN:
print("滑鼠點擊事件座標 (x, y):", x, y)
depth_pixel = [x, y]
dis, camera_coordinate = get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin)
print ('深度: ',dis)
#print ('camera_coordinate: ',camera_coordinate)
#初始化幀率及YOLO模型
fps = 0.0
yolo = YOLO()
yolo.confidence = 0.3
frame_skip = 5
skip_counter = 0
#主迴圈
if __name__=="__main__": # 確定此程式作為主程式執行
while True: # 持續執行直到使用者退出
start_time = time.time() # 紀錄程式開始的時間
t1 = time.time()
color_intrin, depth_intrin, img_color, img_depth, aligned_depth_frame, resized_color = get_aligned_images() # 獲取對齊的影像和內部參數
frame = cv2.cvtColor(img_color, cv2.COLOR_BGR2RGB) # 將 BGR 圖像轉換成 RGB
frame = Image.fromarray(np.uint8(frame)) # 將 numpy 陣列轉換成 Image 物件
image, out_boxes = yolo.detect_image(frame) # 使用 YOLO 偵測物件,只回傳邊界框
frame = np.array(image) # 將 Image 物件轉換回 numpy 陣列
frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
# 將影像繪製到左半邊
# 遍歷所有偵測到的邊界框,並根據需要將它們分配到左半邊或右半邊
for i, (top, left, bottom, right) in enumerate(out_boxes):
object_class = yolo.class_names[i] # 獲取物品的種類
text = f"Object {i + 1}: {object_class}"
cv2.putText(left_half, text, (50, 50 + i * 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
# 在左半邊的彩色影像上顯示物品偵測結果
for i, (top, left, bottom, right) in enumerate(out_boxes):
object_class = yolo.class_names[i] # 獲取物品的種類
text = f"Object {i + 1}: {object_class}"
cv2.putText(left_half, text, (50, 50 + i * 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
# 在右半邊的區域上顯示右半邊的物品名稱
for i, (top, left, bottom, right) in enumerate(out_boxes):
object_class = yolo.class_names[i] # 獲取物品的種類
text = f"Object {i + 1}: {object_class}"
cv2.putText(top_half, text, (50, 50 + i * 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
# 調整 frame 的大小以符合 left_half
frame = cv2.resize(frame, (left_half.shape[1], left_half.shape[0]))
left_half[:, :, :] = frame
# 遍歷所有偵測到的邊界框
# 在遍歷邊界框之前,初始化變數來儲存最遠和最近的深度
min_depth_val = float('inf')
max_depth_val = float('-inf')
min_depth_point = None
max_depth_point = None
center_points = []
for top, left, bottom, right in out_boxes:
min_depth = float('inf') # 初始化最小深度為無窮大
max_depth = float('-inf') # 初始化最大深度為無窮小
min_depth_pixel = None # 初始化最小深度的像素點為 None
max_depth_pixel = None # 初始化最大深度的像素點為 None
center_x = (left + right) / 2
# 在每个边界框处理完毕后执行左右关系检测
for i in range(1, len(out_boxes) + 1): # 從1開始,直到len(out_boxes) + 1
for j in range(i + 1, len(out_boxes) + 1): # 從i+1開始,直到len(out_boxes) + 1
center_x1 = (out_boxes[i - 1][1] + out_boxes[i - 1][3]) / 2
center_x2 = (out_boxes[j - 1][1] + out_boxes[j - 1][3]) / 2
# 定义一个阈值,用于确定左右关系
threshold = 20 # 可以根据实际情况调整阈值
if center_x1 < center_x2 - threshold:
relation = "left"
elif center_x1 > center_x2 + threshold:
relation = "right"
else:
relation = "overlap"
# 將左右關係信息添加到右半邊的下半部分
text = f"Bounding Box {i} is on the {relation} side of Bounding Box {j}"
line_height = 30 # 設定每行文字的高度
y_position = 50 + ((i - 1) * (len(out_boxes)) + j - 1) * line_height
cv2.putText(bottom_half, text, (50, y_position), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2)
# 遍歷邊界框內的所有像素
for y in range(int(top), int(bottom)):
for x in range(int(left), int(right)):
depth_pixel = [x, y] # 建立像素座標
dis, _ = get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin) # 獲取像素點的深度
# 更新最小和最大深度
if dis > 0:
if dis < min_depth:
min_depth = dis
min_depth_pixel = depth_pixel
if dis > max_depth:
max_depth = dis
max_depth_pixel = depth_pixel
if min_depth_pixel is not None:
min_depth_val = min_depth
min_depth_point = min_depth_pixel
if max_depth_pixel is not None:
max_depth_val = max_depth
max_depth_point = max_depth_pixel
# 計算邊界框的中心點座標
center_x = int((left + right) / 2)
center_y = int((top + bottom) / 2)
depth_pixel = [center_x, center_y]
center_depth, _ = get_3d_camera_coordinate(depth_pixel, aligned_depth_frame, depth_intrin) # 獲取中心點的深度
# 印出最小、最大和中心深度的資訊
print(f"Minimum depth at: {min_depth_pixel}, value: {min_depth}")
print(f"Maximum depth at: {max_depth_pixel}, value: {max_depth}")
print(f"Center depth at: {depth_pixel}, value: {center_depth}")
# 在畫面上標示中心點及最大最小深度值點
cv2.circle(frame, (center_x, center_y), 5, GREEN_COLOR, -1)
cv2.circle(frame, (min_depth_point[0], min_depth_point[1]), 5, RED_COLOR, -1)
cv2.circle(frame, (max_depth_point[0], max_depth_point[1]), 5, YELLOW_COLOR, -1)
center_points.append((center_x, center_y, center_depth))
#檢查是否有至少兩個偵測到的邊界框,並計算兩個中心點之間的距離
if len(out_boxes) >= 2:
center_x1, center_y1, center_depth1 = center_points[0]
center_x2, center_y2, center_depth2 = center_points[1]
# 計算兩個中心點之間的距離
distance_x = center_x2 - center_x1
distance_y = center_y2 - center_y1
print(f"Center points distance (x, y): ({distance_x}, {distance_y})")
cv2.setMouseCallback('RealSence', mouse_callback) # 設定滑鼠回調函數,用來處理滑鼠事件
# 在畫面上顯示 FPS
frame = cv2.putText(frame, "fps= %.2f"%(fps), (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
# 計算並更新 FPS
fps = ( fps + (1./(time.time()-t1)) ) / 2
print("fps= %.2f"%(fps)) # 印出 FPS
cv2.imshow('RealSense', canvas) # 顯示處理後的畫面
key = cv2.waitKey(1)&0xff # 讀取使用者按下的按鍵
# 計算並印出執行時間
elapsed_time = time.time() - start_time
print("Time: %.2f seconds" % elapsed_time)
# 如果使用者按下 'q',則跳出迴圈
if key == ord('q'):
break
# 如果使用者按下 's',則等待使用者再次按下按鍵
if key == ord('s'):
cv2.waitKey(0)
pipeline.stop()
cv2.destroyAllWindows()
```
