-
Notifications
You must be signed in to change notification settings - Fork 155
Description
I want to know. What is the normal reasoning speed of the pt model? Even when I run fp16, I can only increase it to 9.2 frames per second.
My source code for reasoning is as follows:
`import os
import time
import glob
from pathlib import Path
from ultralytics import YOLO
import cv2
from tqdm import tqdm
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np
from pathlib import Path
class ImageDataset(Dataset):
"""自定义图片数据集类"""
def __init__(self, image_paths):
self.image_paths = image_paths
def __len__(self):
return len(self.image_paths)
def __getitem__(self, idx):
image_path = self.image_paths[idx]
return image_path
def collate_fn(batch):
"""自定义collate函数,返回图片路径列表"""
return batch
def preprocess_image(image_path: str, imgsz: int = 800) -> torch.Tensor:
"""读取并预处理单张图片 -> Tensor"""
img = cv2.imread(image_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = cv2.resize(img, (imgsz, imgsz))
img = img.astype(np.float32) / 255.0
img = torch.from_numpy(img).permute(2, 0, 1) # [3,H,W]
return img
def main():
"""
使用PyTorch DataLoader和GPU批量进行YOLOv13模型推理
"""
model_path = "./yolo_results_l/brid_train_l7/weights/best.pt"
test_images_dir = "/root/autodl-fs/test_data/test_img1000"
output_dir = "./output_results/l_train_800_2"
os.makedirs(output_dir, exist_ok=True)
if not os.path.exists(model_path):
print(f"❌ 模型文件不存在: {model_path}")
return
if not os.path.exists(test_images_dir):
print(f"❌ 测试图片目录不存在: {test_images_dir}")
return
image_extensions = ['jpg', 'jpeg', 'png', 'bmp', 'tiff', 'tif']
image_files = []
for ext in image_extensions:
image_files.extend(glob.glob(os.path.join(test_images_dir, f"*.{ext}")))
image_files.extend(glob.glob(os.path.join(test_images_dir, f"*.{ext.upper()}")))
image_files = list(set(image_files))
if not image_files:
print(f"❌ 在目录 {test_images_dir} 中未找到图片文件")
return
print(f"📁 找到 {len(image_files)} 张图片")
print(f"🤖 加载模型: {model_path}")
# 加载模型到 GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = YOLO(model_path).to(device)
model.eval()
print(f"✅ 模型加载完成,使用设备: {device}")
# DataLoader
dataset = ImageDataset(image_files)
batch_size = 64 # 建议按显存调
num_workers = 8
dataloader = DataLoader(
dataset,
batch_size=batch_size,
shuffle=False,
num_workers=num_workers,
collate_fn=collate_fn,
pin_memory=True,
persistent_workers=True
)
print(f"📦 DataLoader配置: batch_size={batch_size}, num_workers={num_workers}")
print(f"🚀 开始推理...")
start_time = time.time()
success_count = 0
error_count = 0
pbar = tqdm(total=len(image_files), desc="推理进度", unit="张")
try:
for batch_idx, batch_paths in enumerate(dataloader):
try:
# 批量预处理
imgs = [preprocess_image(p, imgsz=800) for p in batch_paths]
imgs = torch.stack(imgs, dim=0).to(device, non_blocking=True)
# 前向推理
with torch.no_grad():
results = model(imgs)
# 保存推理结果图片
for i, r in enumerate(results):
save_path = os.path.join(output_dir, "inference_results")
os.makedirs(save_path, exist_ok=True)
# YOLO 内置 save(),但要手动指定文件名
r.save(filename=os.path.join(save_path, Path(batch_paths[i]).name))
success_count += len(batch_paths)
pbar.update(len(batch_paths))
pbar.set_postfix({
'batch': f"{batch_idx+1}/{len(dataloader)}",
'processed': success_count,
'batch_size': len(batch_paths)
})
if torch.cuda.is_available():
torch.cuda.empty_cache()
except Exception as batch_e:
print(f"❌ 批次 {batch_idx+1} 推理失败: {str(batch_e)}")
error_count += len(batch_paths)
pbar.update(len(batch_paths))
pbar.close()
print(f"✅ 成功推理 {success_count} 张图片")
except Exception as e:
if 'pbar' in locals():
pbar.close()
print(f"❌ 推理过程中出现错误: {str(e)}")
error_count = len(image_files) - success_count
total_time = time.time() - start_time
print("\n" + "=" * 60)
print("📊 推理统计信息 (GPU批量)")
print("=" * 60)
print(f"📁 测试图片目录: {test_images_dir}")
print(f"🤖 使用模型: {model_path}")
print(f"📦 DataLoader配置: batch_size={batch_size}, num_workers={num_workers}")
print(f"📸 总图片数量: {len(image_files)}")
print(f"✅ 成功推理: {success_count}")
print(f"❌ 失败数量: {error_count}")
print(f"⏱️ 推理总耗时: {total_time:.2f} 秒")
if success_count > 0:
avg_time = total_time / success_count
fps = success_count / total_time
print(f"⚡ 平均每张: {avg_time:.3f} 秒")
print(f"🎯 推理速度: {fps:.2f} FPS")
print(f"💾 结果保存在: {output_dir}/inference_results")
print("=" * 60)
if name == "main":
main()
`