研究了一阵子的yolov5算法,个人是利用CPU跑算法,训练速度很慢,效果也一般,需要自己训练yolov算法,并将算法封装,提供API接口,供其调用。环境配置和代码封装见文末。 import math import cv2 import detect_with_API import torch from openni import openni2 import numpy as np def depth_img(depth_dpt, x1, y1, x3, y3): PointCloudData1 = [] PointCloudData3 = [] # 相机内参 fx = 579.446 fy = 579.1467 cx = 318.1348 cy = 239.8945 # 对每个点的像素深度值进行处理,获取的是平面数据,再根据平面坐标xy确定z的数据,Z值有误差,需要用陀螺仪校准,特征会漂移 # print('x1= ', x1) # print('y1= ', y1) for x in range(0, len(depth_dpt), 1): # np数组中获取高度值, 遍历每一列 每个高度里面有多少个像素点 for y in range(0, len(depth_dpt[x])): # 遍历每一行 if (depth_dpt[x][y] != 0): # 判定每个像素点的深度值 if x in y1 and y in x1: for i in range(len(x1)): if x == y1[i] and y == x1[i]: # PointCloudData_2.append([y, x, depth_dpt[x, y]]) # 存储 y 、 x 、z坐标 x_point = float((y - cx) * (depth_dpt[x, y])) / fx y_point = float((x - cy) * (depth_dpt[x, y])) / fy # PointCloudData.append(f'{x_point} {y_point} {depth_dpt[x][y]}\n') PointCloudData1.append([x_point, y_point, depth_dpt[x, y]]) # 存储 y 、 x 、z坐标 elif x in y3 and y in x3: for i in range(len(x3)): if x == y3[i] and y == x3[i]: # PointCloudData_2.append([y, x, depth_dpt[x, y]]) # 存储 y 、 x 、z坐标 x_point = float((y - cx) * (depth_dpt[x, y])) / fx y_point = float((x - cy) * (depth_dpt[x, y])) / fy # PointCloudData.append(f'{x_point} {y_point} {depth_dpt[x][y]}\n') PointCloudData3.append([x_point, y_point, depth_dpt[x, y]]) # 存储 y 、 x 、z坐标 cv2.calibrateHandEye() cv2.solvePnP() print('特征点point1真实三维坐标:', PointCloudData1) print('特征点point3真实三维坐标:', PointCloudData3) print() # 将每一帧的结果输出分开 angle = (PointCloudData1[0][0] - PointCloudData3[0][0]) / math.sqrt(math.pow((PointCloudData1[0][0] - PointCloudData3[0][0]),2)+math.pow((PointCloudData1[0][2] - PointCloudData3[0][2]),2)) # print('y坐标:', PointCloudData_2[1]) # print('z坐标:', PointCloudData_2[2]) # with open('data.txt', 'w') as F: # F.writelines(PointCloudData) return angle if __name__ == '__main__': cap = cv2.VideoCapture(0) a = detect_with_API.DetectAPI(weights='runs/train/exp5/weights/best.pt') with torch.no_grad(): rec, img = cap.read() result, names = a.detect([img]) img = result[0][0] # 每一帧图片的处理结果图片 # 每一帧图像的识别结果(可包含多个物体) x_point1 = [] y_point1 = [] x_point2 = [] y_point2 = [] x_point3 = [] y_point3 = [] for cls, (x1, y1, x2, y2), conf in result[0][1]: # print(names[cls], x1, y1, x2, y2, conf) # 识别物体种类、左上角x坐标、左上角y轴坐标、右下角x轴坐标、右下角y轴坐标,置信度 x0 = int(x1 + (x2 - x1) / 2) y0 = int(y1 + (y2 - y1) / 2) if names[cls] == 'point1': x_point1.append(x0) y_point1.append(y0) if names[cls] == 'point3': x_point2.append(x0) y_point2.append(y0) ''' # cv2.rectangle(img,(x1,y1),(x2,y2),(0,255,0)) # cv2.putText(img,names[cls],(x1,y1-20),cv2.FONT_HERSHEY_DUPLEX,1.5,(255,0,0))''' cap.release() cv2.destroyAllWindows() openni2.initialize() dev = openni2.Device.open_any() print(dev.get_device_info()) depth_stream = dev.create_depth_stream() depth_stream.start() frame = depth_stream.read_frame() dframe_data = np.array(frame.get_buffer_as_triplet()).reshape([480, 640, 2]) dpt1 = np.asarray(dframe_data[:, :, 0], dtype='float32') dpt2 = np.asarray(dframe_data[:, :, 1], dtype='float32') dpt2 *= 255 dpt = dpt1 + dpt2 dpt = dpt[:, ::-1] angle = depth_img(dpt, x_point1, y_point1, x_point3, y_point3) depth_stream.stop() dev.close()
yolov5环境配置和代码封装都有很详细的教程,下面提供参考链接:
yolov5环境配置:在CPU上跑yolov5(详细步骤+适合入门)_yolo 怎么跑起来-CSDN博客
【Yolov5】1.认真总结6000字Yolov5保姆级教程(2022.06.28全新版本v6.1)_yolov5教程-CSDN博客
yolov5代码封装:魔改并封装 YoloV5 Version7 的 detect.py 成 API接口以供 python 程序使用_yolov5部署成api接口-CSDN博客