How to configure and train a yolov8n model with a pyramid enhancement block (Pre module to the yolov8) #23681

Mahanthvadlamoodi · 2026-02-17T20:54:35Z

Mahanthvadlamoodi
Feb 17, 2026

I have a Pyramid enhancement network, which helps the detector mostly in the low light scenarios, referencing the PEYOLO paper, i am trying to implement it with the yolov8n model (3M params) and PENet is around 90K params
but facing problems in how to add this PENet with the yolov8n model, PENet dont have separate loss function, it should be combined training using yolo loss only as this is a recognition driven enhancement.
below is the PENet code
``

PENet: Pyramid Enhancement Network (
import torch
import torch.nn as nn
import torch.nn.functional as F

-----------------------

Laplacian Pyramid utils

-----------------------

class Lap_Pyramid_Conv(nn.Module):
def init(self, num_high=3, kernel_size=5, channels=3):
super().init()
self.num_high = num_high
# register fixed kernel buffer for AMP safety (dtype/device later matched)
self.register_buffer("kernel", self.gauss_kernel(kernel_size, channels), persistent=False)

def gauss_kernel(self, kernel_size, channels):
    base = torch.arange(kernel_size, dtype=torch.float32)
    gauss_1d = torch.exp(-0.5 * ((base - (kernel_size - 1) / 2) ** 2))
    gauss_1d = gauss_1d / gauss_1d.sum()
    kernel = gauss_1d[:, None] @ gauss_1d[None, :]
    kernel = kernel.unsqueeze(0).unsqueeze(0).repeat(channels, 1, 1, 1)
    return kernel

def conv_gauss(self, x):
    kernel = self.kernel.to(x.device).to(x.dtype)
    n_channels = kernel.shape[0]
    pad = kernel.shape[-1] // 2
    x = F.pad(x, (pad, pad, pad, pad), mode='reflect')
    return F.conv2d(x, kernel, groups=n_channels)

def downsample(self, x):
    return x[:, :, ::2, ::2]

def upsample(self, x):
    # safe, differentiable upsample via zero Insert + conv
    up = torch.zeros((x.size(0), x.size(1), x.size(2) * 2, x.size(3) * 2),
                     dtype=x.dtype, device=x.device)
    up[:, :, ::2, ::2] = x * 4.0
    return self.conv_gauss(up)

def pyramid_decom(self, img):
    current = img
    pyr = []
    for _ in range(self.num_high):
        down = self.downsample(self.conv_gauss(current))
        up = self.upsample(down)
        diff = current - up
        pyr.append(diff)
        current = down
    pyr.append(current)
    return pyr

def pyramid_recons(self, pyr):
    image = pyr[0]
    for level in pyr[1:]:
        up = self.upsample(image)
        image = up + level
    return image

-----------------------

Small enhancement blocks

-----------------------

class ResidualBlock(nn.Module):
def init(self, in_features, out_features=None):
super().init()
if out_features is None:
out_features = in_features
self.block = nn.Sequential(
nn.Conv2d(in_features, in_features, 3, padding=1),
nn.LeakyReLU(inplace=True),
nn.Conv2d(in_features, in_features, 3, padding=1),
)
self.conv_out = nn.Conv2d(in_features, out_features, 3, padding=1)

def forward(self, x):
    return self.conv_out(x + self.block(x))

class DPM(nn.Module):
def init(self, inplanes, planes, act=nn.LeakyReLU(0.2, inplace=True), bias=False):
super().init()
self.conv_mask = nn.Conv2d(inplanes, 1, 1, bias=bias)
self.softmax = nn.Softmax(dim=2)
self.channel_add_conv = nn.Sequential(
nn.Conv2d(inplanes, planes, 1, bias=bias),
act,
nn.Conv2d(planes, inplanes, 1, bias=bias)
)

def spatial_pool(self, x):
    B, C, H, W = x.size()
    input_x = x.view(B, C, H * W).unsqueeze(1)
    context_mask = self.softmax(self.conv_mask(x).view(B, 1, H * W)).unsqueeze(3)
    context = torch.matmul(input_x, context_mask).view(B, C, 1, 1)
    return context

def forward(self, x):
    return x + self.channel_add_conv(self.spatial_pool(x))

-----------------------

Sobel filter (AMP-safe)

-----------------------

def sobel(img):
dtype, device, ch = img.dtype, img.device, img.shape[1]
gx = torch.tensor([[1, 0, -1],
[2, 0, -2],
[1, 0, -1]], dtype=dtype, device=device).view(1, 1, 3, 3)
gy = torch.tensor([[1, 2, 1],
[0, 0, 0],
[-1, -2, -1]], dtype=dtype, device=device).view(1, 1, 3, 3)
edge_x = F.conv2d(img, gx.repeat(ch, 1, 1, 1), padding=1, groups=ch)
edge_y = F.conv2d(img, gy.repeat(ch, 1, 1, 1), padding=1, groups=ch)
return torch.sqrt(edge_x ** 2 + edge_y ** 2 + 1e-6)

class LowPassModule(nn.Module):
def init(self, in_channel, sizes=(1, 2, 3, 6)):
super().init()
self.stages = nn.ModuleList([nn.AdaptiveAvgPool2d((s, s)) for s in sizes])
self.relu = nn.ReLU()
ch = in_channel // 4
self.channel_splits = [ch] * 4

def forward(self, feats):
    h, w = feats.size(2), feats.size(3)
    feats = torch.split(feats, self.channel_splits, dim=1)
    priors = [
        F.interpolate(self.stages[i](feats[i]), size=(h, w), mode='bilinear', align_corners=False)
        for i in range(4)
    ]
    return self.relu(torch.cat(priors, 1))

class AE(nn.Module):
def init(self, n_feat=3, bias=False):
super().init()
self.edge_conv = nn.Conv2d(3, 3, 1, bias=bias)
self.res1 = ResidualBlock(3, 32)
self.dpm = DPM(32, 32)
self.res2 = ResidualBlock(32, 3)
self.low_conv1 = nn.Conv2d(3, 32, 1)
self.low_conv2 = nn.Conv2d(32, 3, 1)
self.low_pass = LowPassModule(32)

    # ---- NEW: Dynamic Gate ----
    self.gate = nn.Conv2d(6, 1, 1)   # lightweight
    self.sigmoid = nn.Sigmoid()

    # keep original fuse
    self.fuse = nn.Conv2d(6, 3, 1)

def forward(self, x):
    edge = self.edge_conv(sobel(x))
    res = self.res2(self.dpm(self.res1(x)))
    low = self.low_conv2(self.low_pass(self.low_conv1(x)))

    hf = res + edge
    lf = low

    # -------- Dynamic Fusion --------
    fusion_input = torch.cat([hf, lf], dim=1)
    alpha = self.sigmoid(self.gate(fusion_input))

    hf = alpha * hf
    lf = (1.0 - alpha) * lf

    fused = torch.cat([hf, lf], dim=1)
    return self.fuse(fused)

-----------------------

PENet main

-----------------------

class PENet(nn.Module):
def init(self, c1=3, num_high=3, gauss_kernel=5):
super().init()
self.num_high = num_high
self.lap_pyramid = Lap_Pyramid_Conv(num_high, gauss_kernel, channels=c1)
for i in range(self.num_high + 1):
self.setattr(f"AE_{i}", AE(c1))
self.out_conv = nn.Conv2d(c1, c1, 1)

def forward(self, x):
    # use amp autocast safely if running on cuda and x is float16
    with torch.amp.autocast(device_type='cuda', enabled=(x.is_cuda and x.dtype == torch.float16)):
        pyrs = self.lap_pyramid.pyramid_decom(x)
        # apply AE to corresponding pyramid levels (reverse order)
        enhanced = [self.__getattr__(f"AE_{i}")(pyrs[-1 - i]) for i in range(self.num_high + 1)]
        out = self.lap_pyramid.pyramid_recons(enhanced)
        return torch.clamp(self.out_conv(out), 0.0, 1.0)

-----------------------

YOLO compatibility wrapper

-----------------------

class PENetWrapper(PENet):
"""
A thin wrapper so parse_model (YOLO) can find expected attributes:
- .f (from index), .i (index), .type (name)
These attributes are set here as defaults; YOLO's model builder will update indices.
"""
def init(self, *args, **kwargs):
super().init(*args, **kwargs)
# YOLO's graph builder expects these fields
self.f = -1 # takes input from previous layer by default
self.i = 0
self.type = "PENet"

expose

all = ["PENet", "PENetWrapper"]

please help me with the model setup and trainign strategy, i am using ExDark dataset

UltralyticsAssistant · 2026-02-17T20:55:05Z

UltralyticsAssistant
Feb 17, 2026
Maintainer

👋 Hello @Mahanthvadlamoodi, thank you for your interest in Ultralytics 🚀! We recommend a visit to the Docs for new users where you can find many Python and CLI usage examples and where many of the most common questions may already be answered.

This is an automated response 🤖—an Ultralytics engineer will also assist soon 🛠️.

If this is a 🐛 Bug Report, please provide a minimum reproducible example to help us debug it. In your case (custom module + model parsing/training), an MRE is especially important—please include:
-Your exact ultralytics version and install method
-A minimal model config or code snippet showing how you’re attaching PENetWrapper (YAML or Python)
-A single command you run to reproduce the issue (CLI or Python)
-The full error traceback/log output
-A small dummy dataset sample (or a synthetic input test) that triggers the same failure

If this is a custom training ❓ Question, please provide as much information as possible, including dataset image examples and training logs, and verify you are following our Tips for Best Training Results. Also share:
-Your dataset YAML (paths, class names, train/val split)
-Image sizes, augmentations, and whether you’re training from scratch or starting from pretrained weights
-Any changes you made to the model definition to insert the enhancement block “pre-module”

Join the Ultralytics community where it suits you best. For real-time chat, head to Discord 🎧. Prefer in-depth discussions? Check out Discourse. Or dive into threads on our Subreddit to share knowledge with the community.

Upgrade

Upgrade to the latest ultralytics package including all requirements in a Python>=3.8 environment with PyTorch>=1.8 to verify your issue is not already resolved in the latest version:

pip install -U ultralytics

Environments

YOLO may be run in any of the following up-to-date verified environments (with all dependencies including CUDA/CUDNN, Python and PyTorch preinstalled):

Notebooks with free GPU:
Google Cloud Deep Learning VM. See GCP Quickstart Guide
Amazon Deep Learning AMI. See AWS Quickstart Guide
Docker Image. See Docker Quickstart Guide

Status

If this badge is green, all Ultralytics CI tests are currently passing. CI tests verify correct operation of all YOLO Modes and Tasks on macOS, Windows, and Ubuntu every 24 hours and on every commit.

1 reply

glenn-jocher Feb 17, 2026
Maintainer

You can train this end-to-end with standard Ultralytics YOLO detection loss (no extra loss needed), but don’t insert PENet as an extra YAML layer because it will shift YOLOv8n’s absolute from indices; instead replace the first stem layer with a wrapper that runs PENet then the original stem Conv (same args as the YAML you’re starting from). Also, you don’t need .f/.i/.type or an internal autocast() block—Ultralytics already attaches graph metadata and manages AMP globally. The clean path is the same as in the Model YAML Configuration Guide: define your module in ultralytics/nn/modules/block.py, export it in ultralytics/nn/modules/__init__.py, and import it in ultralytics/nn/tasks.py so the YAML parser can resolve it.

# ultralytics/nn/modules/block.py
import torch
import torch.nn as nn
from ultralytics.nn.modules.conv import Conv  # Ultralytics Conv

class PENetStem(nn.Module):
    def __init__(self, c1: int, c2: int, k: int = 3, s: int = 2, num_high: int = 3, gauss_kernel: int = 5):
        super().__init__()
        self.enhance = PENet(c1=c1, num_high=num_high, gauss_kernel=gauss_kernel)  # your PENet
        self.stem = Conv(c1, c2, k=k, s=s)

    def forward(self, x):
        x = self.enhance(x)
        return self.stem(x)

# in your yolov8n.yaml, replace the first stem layer (don’t add a new one)
backbone:
  - [-1, 1, PENetStem, [16, 3, 2, 3, 5]]  # [c2, k, s, num_high, gauss_kernel] (keep c2/k/s consistent with the original layer)

If you share the exact error you’re hitting (full text traceback) plus yolo checks, I can tell you whether it’s a module-resolution issue (KeyError: 'PENetStem' → missing import) or a channel/index mismatch from the YAML edit.

Mahanthvadlamoodi · 2026-02-17T20:59:30Z

Mahanthvadlamoodi
Feb 17, 2026
Author

@glenn-jocher can you please help me here?
and is yolov8n really stable with 3M params to handle too much random weight initialization in the PENet which will be the pre module for yolo. Or should I check PENet capability with yolov8m or l series sinze they have much stronger backbone compared to v8s

1 reply

glenn-jocher Feb 18, 2026
Maintainer

You don’t need the PENetWrapper (.f/.i/.type are set by Ultralytics), you just need PENet to be resolvable from the YAML parser (add it under ultralytics/nn/modules/ and import it into ultralytics/nn/tasks.py), as described in the Model YAML configuration guide, then prepend it to the yolov8n backbone like:

# yolov8n_penet.yaml
nc: 80

backbone:
  - [-1, 1, PENet, [3, 3, 5]]  # c1=3, num_high=3, gauss_kernel=5
  - [-1, 1, Conv, [64, 3, 2]]
  # ... keep the rest of yolov8n.yaml unchanged
head:
  # ... unchanged

yolov8n is generally “stable”; the real risk is a randomly initialized pre-module heavily perturbing inputs early in training, so I’d make PENet start near-identity (and avoid hard clamp() saturation) so the detector initially sees something close to the original image:

import torch.nn as nn

# inside PENet.__init__
nn.init.zeros_(self.out_conv.weight)
nn.init.zeros_(self.out_conv.bias)

If you still see underfitting after PENet is behaving, then scale up to yolov8s/m for capacity—but I’d debug the integration/training dynamics on yolov8n first since it’s faster to iterate.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Ultralytics

How to configure and train a yolov8n model with a pyramid enhancement block (Pre module to the yolov8) #23681

Uh oh!

{{title}}

Uh oh!

Replies: 2 comments 2 replies

Uh oh!

{{title}}

Uh oh!

Uh oh!

{{title}}

Uh oh!

Uh oh!

{{title}}

Uh oh!

Uh oh!

{{title}}

Uh oh!

Select a reply

Uh oh!

Ultralytics

How to configure and train a yolov8n model with a pyramid enhancement block (Pre module to the yolov8) #23681

Uh oh!

Mahanthvadlamoodi Feb 17, 2026

-----------------------

Laplacian Pyramid utils

-----------------------

-----------------------

Small enhancement blocks

-----------------------

-----------------------

Sobel filter (AMP-safe)

-----------------------

-----------------------

PENet main

-----------------------

-----------------------

YOLO compatibility wrapper

-----------------------

expose

Replies: 2 comments · 2 replies

Uh oh!

UltralyticsAssistant Feb 17, 2026 Maintainer

Upgrade

Environments

Status

Uh oh!

glenn-jocher Feb 17, 2026 Maintainer

Uh oh!

Mahanthvadlamoodi Feb 17, 2026 Author

Uh oh!

glenn-jocher Feb 18, 2026 Maintainer

Mahanthvadlamoodi
Feb 17, 2026

Replies: 2 comments 2 replies

UltralyticsAssistant
Feb 17, 2026
Maintainer

glenn-jocher Feb 17, 2026
Maintainer

Mahanthvadlamoodi
Feb 17, 2026
Author

glenn-jocher Feb 18, 2026
Maintainer