@@ -3,6 +3,9 @@ sidebar_position: 3
33title : OpenCV
44---
55
6+ import Tabs from ' @theme/Tabs' ;
7+ import TabItem from ' @theme/TabItem' ;
8+
69早期计算机处理的数据有2种:文本数据、图像数据。其中文本数据又称为序列数据,图像数据又称为网格数据。
710
811计算机视觉(Computer Vision)是人工智能的一个重要分支,它研究如何让计算机“看”和“理解”图像和视频。也叫机器视觉(Machine Vision)。
@@ -11,7 +14,7 @@ title: OpenCV
1114
1215它向前承接传统以数组为单位的图像处理,向后允许你加载一些现成的算法、模型,是传统图像处理与深度学习之间重要的桥梁。
1316
14- 目前OpenCV主要用于<Highlight color = " g" >图像的预处理作为深度学习模型的输入</Highlight >、或者< Highlight color = " g " >用较低的资源运行深度学习模型</ Highlight > 。
17+ 目前OpenCV主要用于<Highlight color = " g" >图像的预处理作为深度学习模型的输入</Highlight >。
1518
1619## 安装
1720
@@ -1803,6 +1806,194 @@ if __name__ == "__main__":
18031806
18041807如果你不想从头写模型,直接使用 Ultralytics YOLO(v11)是目前最推荐的工业级方案。
18051808
1806- 训练好的 PyTorch / YOLO 模型都可以非常方便地导出为 ONNX 格式,并用 OpenCV DNN 模块加载推理。
1809+ :::tip
1810+ 训练好的 PyTorch / YOLO 模型都可以导出为 ONNX 格式,并用 OpenCV DNN 模块加载推理。
1811+
1812+ 经过测试:opencv加载YOLOv11模型,与原生yolo的FPS一致、内存占用一致。Opencv的CPU占用率更高。
1813+
1814+ 所以使用OpenCV的唯一原因是可以用` C++ ` 版本的代码,在无python依赖的设备上运行,适合资源受限的边缘设备。
1815+ :::
1816+
1817+ ### 导出模型
1818+
1819+ ``` python showLineNumbers
1820+ from ultralytics import YOLO
1821+ import os
1822+ import shutil
1823+
1824+ def export_yolo_to_onnx (model_path , output_path , imgsz = 640 , batch_size = 1 ):
1825+ model = YOLO(model_path)
1826+ model.export(
1827+ format = ' onnx' ,
1828+ imgsz = imgsz,
1829+ batch = batch_size,
1830+ simplify = True ,
1831+ opset = 11 ,
1832+ dynamic = False ,
1833+ half = False
1834+ )
1835+
1836+ # 重命名到指定路径
1837+ base_name = os.path.splitext(model_path)[0 ]
1838+ auto_path = base_name + ' .onnx'
1839+ if os.path.exists(auto_path) and auto_path != output_path:
1840+ shutil.move(auto_path, output_path)
1841+
1842+ print (f " 导出完成: { output_path} " )
1843+
1844+
1845+ if __name__ == " __main__" :
1846+ # 导出yolo11n.pt模型(官方模板)为onnx格式
1847+ export_yolo_to_onnx(" yolo11n.pt" , " yolo11n.onnx" , imgsz = 640 , batch_size = 1 )
1848+ ```
1849+
1850+ ### 使用模型
1851+
1852+ <Tabs >
1853+ <TabItem value = " ultralytics" label = " Ultralytics" default >
1854+
1855+ ``` python showLineNumbers
1856+ from ultralytics import YOLO
1857+ import cv2
1858+ import time
1859+
1860+ # 加载YOLO模型(使用.pt文件,原生格式)
1861+ model = YOLO(" yolo11n.pt" ) # 或者使用 "yolo11n.onnx" 也可以
1862+
1863+ # 打开摄像头
1864+ cap = cv2.VideoCapture(0 )
1865+
1866+ if not cap.isOpened():
1867+ print (" 无法打开摄像头!" )
1868+ exit ()
1869+
1870+ print (" 摄像头已打开,按 'q' 键退出" )
1871+
1872+ # 实时检测循环
1873+ pTime = 0
1874+ while True :
1875+ ret, frame = cap.read()
1876+ if not ret:
1877+ print (" 无法读取摄像头画面" )
1878+ break
1879+
1880+ # 记录开始时间(计算整个循环的FPS,包括所有操作)
1881+ loop_start = time.time()
1882+
1883+ # YOLO原生推理(自动处理预处理和后处理)
1884+ results = model(frame, verbose = False )
1885+
1886+ # 绘制检测结果
1887+ annotated_frame = results[0 ].plot()
1888+
1889+ # 计算整个循环的FPS(公平比较)
1890+ cTime = time.time()
1891+ fps = 1.0 / (cTime - pTime) if (cTime - pTime) > 0 else 0
1892+ pTime = cTime
1893+
1894+ # 显示FPS和推理时间(可选)
1895+ inference_time = results[0 ].speed[' inference' ] # ms
1896+ cv2.putText(annotated_frame, f " FPS: { fps:.1f } (Loop) " , (10 , 30 ),
1897+ cv2.FONT_HERSHEY_SIMPLEX , 0.7 , (0 , 255 , 0 ), 2 )
1898+ cv2.putText(annotated_frame, f " Inference: { inference_time:.1f } ms " , (10 , 60 ),
1899+ cv2.FONT_HERSHEY_SIMPLEX , 0.7 , (255 , 255 , 0 ), 2 )
1900+
1901+ # 显示画面
1902+ cv2.imshow(" YOLO 实时检测" , annotated_frame)
1903+
1904+ # 按 'q' 键退出
1905+ if cv2.waitKey(1 ) & 0x FF == ord (' q' ):
1906+ break
1907+
1908+ # 释放资源
1909+ cap.release()
1910+ cv2.destroyAllWindows()
1911+ print (" 已关闭摄像头" )
1912+ ```
1913+
1914+ </TabItem >
1915+ <TabItem value = " opencv" label = " OpenCV" default >
1916+ ``` python showLineNumbers
1917+ import cv2
1918+ import numpy as np
1919+ import time
1920+
1921+ # 配置
1922+ ONNX_PATH = " yolo11n.onnx"
1923+ CONF_THRESHOLD = 0.45
1924+ INPUT_SIZE = 640
1925+ USE_CUDA = True
1926+
1927+ # COCO类名
1928+ COCO_CLASSES = [' person' , ' bicycle' , ' car' , ' motorcycle' , ' airplane' , ' bus' , ' train' , ' truck' , ' boat' , ' traffic light' ,
1929+ ' fire hydrant' , ' stop sign' , ' parking meter' , ' bench' , ' bird' , ' cat' , ' dog' , ' horse' , ' sheep' , ' cow' ,
1930+ ' elephant' , ' bear' , ' zebra' , ' giraffe' , ' backpack' , ' umbrella' , ' handbag' , ' tie' , ' suitcase' , ' frisbee' ,
1931+ ' skis' , ' snowboard' , ' sports ball' , ' kite' , ' baseball bat' , ' baseball glove' , ' skateboard' , ' surfboard' ,
1932+ ' tennis racket' , ' bottle' , ' wine glass' , ' cup' , ' fork' , ' knife' , ' spoon' , ' bowl' , ' banana' , ' apple' ,
1933+ ' sandwich' , ' orange' , ' broccoli' , ' carrot' , ' hot dog' , ' pizza' , ' donut' , ' cake' , ' chair' , ' couch' ,
1934+ ' potted plant' , ' bed' , ' dining table' , ' toilet' , ' tv' , ' laptop' , ' mouse' , ' remote' , ' keyboard' , ' cell phone' ,
1935+ ' microwave' , ' oven' , ' toaster' , ' sink' , ' refrigerator' , ' book' , ' clock' , ' vase' , ' scissors' , ' teddy bear' ,
1936+ ' hair drier' , ' toothbrush' ]
1937+
1938+ # 加载模型
1939+ net = cv2.dnn.readNetFromONNX(ONNX_PATH )
1940+ if USE_CUDA and cv2.cuda.getCudaEnabledDeviceCount() > 0 :
1941+ net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA )
1942+ net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA_FP16 )
1943+
1944+ # 打开摄像头
1945+ cap = cv2.VideoCapture(0 )
1946+ pTime = 0
1947+
1948+ while True :
1949+ ret, frame = cap.read()
1950+ if not ret:
1951+ break
1952+
1953+ # 推理
1954+ t0 = time.time()
1955+ blob = cv2.dnn.blobFromImage(frame, 1 / 255.0 , (INPUT_SIZE , INPUT_SIZE ), swapRB = True )
1956+ net.setInput(blob)
1957+ output = np.squeeze(net.forward()).T
1958+
1959+ # 后处理
1960+ boxes, scores = output[:, :4 ], output[:, 4 :84 ]
1961+ max_scores = np.max(scores, axis = 1 )
1962+ class_ids = np.argmax(scores, axis = 1 )
1963+ mask = max_scores > CONF_THRESHOLD
1964+
1965+ # 坐标转换
1966+ h, w = frame.shape[:2 ]
1967+ scale_x, scale_y = w / INPUT_SIZE , h / INPUT_SIZE
1968+ boxes_masked = boxes[mask]
1969+ final_boxes = [[int ((cx - bw/ 2 ) * scale_x), int ((cy - bh/ 2 ) * scale_y),
1970+ int (bw * scale_x), int (bh * scale_y)]
1971+ for cx, cy, bw, bh in boxes_masked]
1972+
1973+ # NMS并绘制
1974+ scores_masked = max_scores[mask]
1975+ class_ids_masked = class_ids[mask]
1976+ indices = cv2.dnn.NMSBoxes(final_boxes, scores_masked.tolist(), CONF_THRESHOLD , 0.5 )
1977+ if len (indices) > 0 :
1978+ for i in indices.flatten():
1979+ x, y, bw, bh = final_boxes[i]
1980+ label = f " { COCO_CLASSES [class_ids_masked[i]]} { scores_masked[i]:.2f } "
1981+ cv2.rectangle(frame, (x, y), (x+ bw, y+ bh), (0 , 255 , 0 ), 2 )
1982+ cv2.putText(frame, label, (x, y- 10 ), cv2.FONT_HERSHEY_SIMPLEX , 0.6 , (0 , 255 , 0 ), 2 )
1983+
1984+ # FPS
1985+ cTime = time.time()
1986+ fps = 1 / (cTime - pTime) if (cTime - pTime) > 0 else 0
1987+ pTime = cTime
1988+ cv2.putText(frame, f " FPS: { fps:.1f } " , (10 , 30 ), cv2.FONT_HERSHEY_SIMPLEX , 0.7 , (0 , 255 , 255 ), 2 )
1989+ cv2.putText(frame, f " Inference: { (time.time()- t0)* 1000 :.1f } ms " , (10 , 60 ), cv2.FONT_HERSHEY_SIMPLEX , 0.7 , (255 , 255 , 0 ), 2 )
1990+
1991+ cv2.imshow(" YOLO Webcam" , frame)
1992+ if cv2.waitKey(1 ) == 27 :
1993+ break
18071994
1808- 使用 OpenCV 依赖极轻、CPU 性能尚可,适合资源受限的边缘设备。
1995+ cap.release()
1996+ cv2.destroyAllWindows()
1997+ ```
1998+ </TabItem >
1999+ </Tabs >
0 commit comments