May I ask if the reasoning speed is only 7.5 frames per second? Is this speed normal? I feel it's too slow. What's your reasoning speed, everyone?

<img width="1157" height="235" alt="Image" src="https://github.com/user-attachments/assets/658e7459-8d8d-463e-bda7-71dd2cbd3f19" />
I want to know. What is the normal reasoning speed of the pt model? Even when I run fp16, I can only increase it to 9.2 frames per second.


My source code for reasoning is as follows:
`import os
import time
import glob
from pathlib import Path
from ultralytics import YOLO
import cv2
from tqdm import tqdm
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np
from pathlib import Path


class ImageDataset(Dataset):
    """自定义图片数据集类"""

    def __init__(self, image_paths):
        self.image_paths = image_paths

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        return image_path


def collate_fn(batch):
    """自定义collate函数，返回图片路径列表"""
    return batch


def preprocess_image(image_path: str, imgsz: int = 800) -> torch.Tensor:
    """读取并预处理单张图片 -> Tensor"""
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (imgsz, imgsz))
    img = img.astype(np.float32) / 255.0
    img = torch.from_numpy(img).permute(2, 0, 1)  # [3,H,W]
    return img


def main():
    """
    使用PyTorch DataLoader和GPU批量进行YOLOv13模型推理
    """
    model_path = "./yolo_results_l/brid_train_l7/weights/best.pt"
    test_images_dir = "/root/autodl-fs/test_data/test_img1000"
    output_dir = "./output_results/l_train_800_2"

    os.makedirs(output_dir, exist_ok=True)

    if not os.path.exists(model_path):
        print(f"❌ 模型文件不存在: {model_path}")
        return
    if not os.path.exists(test_images_dir):
        print(f"❌ 测试图片目录不存在: {test_images_dir}")
        return

    image_extensions = ['jpg', 'jpeg', 'png', 'bmp', 'tiff', 'tif']
    image_files = []
    for ext in image_extensions:
        image_files.extend(glob.glob(os.path.join(test_images_dir, f"*.{ext}")))
        image_files.extend(glob.glob(os.path.join(test_images_dir, f"*.{ext.upper()}")))
    image_files = list(set(image_files))

    if not image_files:
        print(f"❌ 在目录 {test_images_dir} 中未找到图片文件")
        return

    print(f"📁 找到 {len(image_files)} 张图片")
    print(f"🤖 加载模型: {model_path}")

    # 加载模型到 GPU
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = YOLO(model_path).to(device)
    model.eval()
    print(f"✅ 模型加载完成，使用设备: {device}")

    # DataLoader
    dataset = ImageDataset(image_files)
    batch_size = 64  # 建议按显存调
    num_workers = 8
    dataloader = DataLoader(
        dataset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=num_workers,
        collate_fn=collate_fn,
        pin_memory=True,
        persistent_workers=True
    )

    print(f"📦 DataLoader配置: batch_size={batch_size}, num_workers={num_workers}")
    print(f"🚀 开始推理...")

    start_time = time.time()
    success_count = 0
    error_count = 0

    pbar = tqdm(total=len(image_files), desc="推理进度", unit="张")

    try:
        for batch_idx, batch_paths in enumerate(dataloader):
            try:
                # 批量预处理
                imgs = [preprocess_image(p, imgsz=800) for p in batch_paths]
                imgs = torch.stack(imgs, dim=0).to(device, non_blocking=True)

                # 前向推理
                with torch.no_grad():
                    results = model(imgs)

                # 保存推理结果图片
                for i, r in enumerate(results):
                    save_path = os.path.join(output_dir, "inference_results")
                    os.makedirs(save_path, exist_ok=True)
                    # YOLO 内置 save()，但要手动指定文件名
                    r.save(filename=os.path.join(save_path, Path(batch_paths[i]).name))

                success_count += len(batch_paths)
                pbar.update(len(batch_paths))
                pbar.set_postfix({
                    'batch': f"{batch_idx+1}/{len(dataloader)}",
                    'processed': success_count,
                    'batch_size': len(batch_paths)
                })

                if torch.cuda.is_available():
                    torch.cuda.empty_cache()

            except Exception as batch_e:
                print(f"❌ 批次 {batch_idx+1} 推理失败: {str(batch_e)}")
                error_count += len(batch_paths)
                pbar.update(len(batch_paths))

        pbar.close()
        print(f"✅ 成功推理 {success_count} 张图片")

    except Exception as e:
        if 'pbar' in locals():
            pbar.close()
        print(f"❌ 推理过程中出现错误: {str(e)}")
        error_count = len(image_files) - success_count

    total_time = time.time() - start_time

    print("\n" + "=" * 60)
    print("📊 推理统计信息 (GPU批量)")
    print("=" * 60)
    print(f"📁 测试图片目录: {test_images_dir}")
    print(f"🤖 使用模型: {model_path}")
    print(f"📦 DataLoader配置: batch_size={batch_size}, num_workers={num_workers}")
    print(f"📸 总图片数量: {len(image_files)}")
    print(f"✅ 成功推理: {success_count}")
    print(f"❌ 失败数量: {error_count}")
    print(f"⏱️  推理总耗时: {total_time:.2f} 秒")

    if success_count > 0:
        avg_time = total_time / success_count
        fps = success_count / total_time
        print(f"⚡ 平均每张: {avg_time:.3f} 秒")
        print(f"🎯 推理速度: {fps:.2f} FPS")

    print(f"💾 结果保存在: {output_dir}/inference_results")
    print("=" * 60)


if __name__ == "__main__":
    main()
`

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

May I ask if the reasoning speed is only 7.5 frames per second? Is this speed normal? I feel it's too slow. What's your reasoning speed, everyone? #60

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

May I ask if the reasoning speed is only 7.5 frames per second? Is this speed normal? I feel it's too slow. What's your reasoning speed, everyone? #60

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions