Skip to content

May I ask if the reasoning speed is only 7.5 frames per second? Is this speed normal? I feel it's too slow. What's your reasoning speed, everyone? #60

@Wei-JL

Description

@Wei-JL
Image I want to know. What is the normal reasoning speed of the pt model? Even when I run fp16, I can only increase it to 9.2 frames per second.

My source code for reasoning is as follows:
`import os
import time
import glob
from pathlib import Path
from ultralytics import YOLO
import cv2
from tqdm import tqdm
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np
from pathlib import Path

class ImageDataset(Dataset):
"""自定义图片数据集类"""

def __init__(self, image_paths):
    self.image_paths = image_paths

def __len__(self):
    return len(self.image_paths)

def __getitem__(self, idx):
    image_path = self.image_paths[idx]
    return image_path

def collate_fn(batch):
"""自定义collate函数,返回图片路径列表"""
return batch

def preprocess_image(image_path: str, imgsz: int = 800) -> torch.Tensor:
"""读取并预处理单张图片 -> Tensor"""
img = cv2.imread(image_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = cv2.resize(img, (imgsz, imgsz))
img = img.astype(np.float32) / 255.0
img = torch.from_numpy(img).permute(2, 0, 1) # [3,H,W]
return img

def main():
"""
使用PyTorch DataLoader和GPU批量进行YOLOv13模型推理
"""
model_path = "./yolo_results_l/brid_train_l7/weights/best.pt"
test_images_dir = "/root/autodl-fs/test_data/test_img1000"
output_dir = "./output_results/l_train_800_2"

os.makedirs(output_dir, exist_ok=True)

if not os.path.exists(model_path):
    print(f"❌ 模型文件不存在: {model_path}")
    return
if not os.path.exists(test_images_dir):
    print(f"❌ 测试图片目录不存在: {test_images_dir}")
    return

image_extensions = ['jpg', 'jpeg', 'png', 'bmp', 'tiff', 'tif']
image_files = []
for ext in image_extensions:
    image_files.extend(glob.glob(os.path.join(test_images_dir, f"*.{ext}")))
    image_files.extend(glob.glob(os.path.join(test_images_dir, f"*.{ext.upper()}")))
image_files = list(set(image_files))

if not image_files:
    print(f"❌ 在目录 {test_images_dir} 中未找到图片文件")
    return

print(f"📁 找到 {len(image_files)} 张图片")
print(f"🤖 加载模型: {model_path}")

# 加载模型到 GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = YOLO(model_path).to(device)
model.eval()
print(f"✅ 模型加载完成,使用设备: {device}")

# DataLoader
dataset = ImageDataset(image_files)
batch_size = 64  # 建议按显存调
num_workers = 8
dataloader = DataLoader(
    dataset,
    batch_size=batch_size,
    shuffle=False,
    num_workers=num_workers,
    collate_fn=collate_fn,
    pin_memory=True,
    persistent_workers=True
)

print(f"📦 DataLoader配置: batch_size={batch_size}, num_workers={num_workers}")
print(f"🚀 开始推理...")

start_time = time.time()
success_count = 0
error_count = 0

pbar = tqdm(total=len(image_files), desc="推理进度", unit="张")

try:
    for batch_idx, batch_paths in enumerate(dataloader):
        try:
            # 批量预处理
            imgs = [preprocess_image(p, imgsz=800) for p in batch_paths]
            imgs = torch.stack(imgs, dim=0).to(device, non_blocking=True)

            # 前向推理
            with torch.no_grad():
                results = model(imgs)

            # 保存推理结果图片
            for i, r in enumerate(results):
                save_path = os.path.join(output_dir, "inference_results")
                os.makedirs(save_path, exist_ok=True)
                # YOLO 内置 save(),但要手动指定文件名
                r.save(filename=os.path.join(save_path, Path(batch_paths[i]).name))

            success_count += len(batch_paths)
            pbar.update(len(batch_paths))
            pbar.set_postfix({
                'batch': f"{batch_idx+1}/{len(dataloader)}",
                'processed': success_count,
                'batch_size': len(batch_paths)
            })

            if torch.cuda.is_available():
                torch.cuda.empty_cache()

        except Exception as batch_e:
            print(f"❌ 批次 {batch_idx+1} 推理失败: {str(batch_e)}")
            error_count += len(batch_paths)
            pbar.update(len(batch_paths))

    pbar.close()
    print(f"✅ 成功推理 {success_count} 张图片")

except Exception as e:
    if 'pbar' in locals():
        pbar.close()
    print(f"❌ 推理过程中出现错误: {str(e)}")
    error_count = len(image_files) - success_count

total_time = time.time() - start_time

print("\n" + "=" * 60)
print("📊 推理统计信息 (GPU批量)")
print("=" * 60)
print(f"📁 测试图片目录: {test_images_dir}")
print(f"🤖 使用模型: {model_path}")
print(f"📦 DataLoader配置: batch_size={batch_size}, num_workers={num_workers}")
print(f"📸 总图片数量: {len(image_files)}")
print(f"✅ 成功推理: {success_count}")
print(f"❌ 失败数量: {error_count}")
print(f"⏱️  推理总耗时: {total_time:.2f} 秒")

if success_count > 0:
    avg_time = total_time / success_count
    fps = success_count / total_time
    print(f"⚡ 平均每张: {avg_time:.3f} 秒")
    print(f"🎯 推理速度: {fps:.2f} FPS")

print(f"💾 结果保存在: {output_dir}/inference_results")
print("=" * 60)

if name == "main":
main()
`

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions