ultralytics · oddzcv · Feb 15, 2026 · Feb 15, 2026 · Feb 15, 2026 · Feb 15, 2026
diff --git a/YOLO_+_Attention_Module.ipynb b/YOLO_+_Attention_Module.ipynb
diff --git a/detect.py b/detect.py
@@ -72,8 +72,8 @@ def run(
     source=ROOT / "data/images",  # file/dir/URL/glob/screen/0(webcam)
     data=ROOT / "data/coco128.yaml",  # dataset.yaml path
     imgsz=(640, 640),  # inference size (height, width)
-    conf_thres=0.25,  # confidence threshold
-    iou_thres=0.45,  # NMS IOU threshold
+    conf_thres=0.3,  # confidence threshold
+    iou_thres=0.1,  # NMS IOU threshold
     max_det=1000,  # maximum detections per image
     device="",  # cuda device, i.e. 0 or 0,1,2,3 or cpu
     view_img=False,  # show results

diff --git a/models/common.py b/models/common.py
@@ -1109,3 +1109,137 @@ def forward(self, x):
         if isinstance(x, list):
             x = torch.cat(x, 1)
         return self.linear(self.drop(self.pool(self.conv(x)).flatten(1)))
+
+
+class SE(nn.Module):
+    """Squeeze-and-Excitation (SE) block."""
+
+    def __init__(self, channels: int, reduction: int = 16):
+        super().__init__()
+        mid = max(1, channels // reduction)
+        self.avg_pool = nn.AdaptiveAvgPool2d(1)
+        self.fc = nn.Sequential(
+            nn.Linear(channels, mid, bias=False),
+            nn.ReLU(inplace=True),
+            nn.Linear(mid, channels, bias=False),
+            nn.Sigmoid(),
+        )
+
+    def forward(self, x):
+        b, c, _, _ = x.size()
+        y = self.avg_pool(x).view(b, c)
+        y = self.fc(y).view(b, c, 1, 1)
+        return x * y
+
+
+class SEBottleneck(nn.Module):
+    """Bottleneck + SE. Aman dipakai di dalam C3SE karena biasanya c1 == c2 (hidden channels). Signature mengikuti
+    Bottleneck: (c1, c2, shortcut, g, e).
+    """
+
+    def __init__(self, c1, c2, shortcut=True, g=1, e=0.5, se_reduction=16):
+        super().__init__()
+        c_ = int(c2 * e)
+        self.cv1 = Conv(c1, c_, 1, 1)
+        self.cv2 = Conv(c_, c2, 3, 1, g=g)
+        self.add = shortcut and c1 == c2
+        self.se = SE(c2, reduction=se_reduction)
+
+    def forward(self, x):
+        y = self.cv2(self.cv1(x))
+        y = self.se(y)
+        return x + y if self.add else y
+
+
+class C3SE(C3):
+    """C3 module with SEBottleneck() inside (drop-in replacement for C3)."""
+
+    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, se_reduction=16):
+        super().__init__(c1, c2, n, shortcut, g, e)
+        c_ = int(c2 * e)
+        self.m = nn.Sequential(*(SEBottleneck(c_, c_, shortcut, g, e=1.0, se_reduction=se_reduction) for _ in range(n)))
+
+
+class ChannelAttention(nn.Module):
+    """CBAM Channel Attention."""
+
+    def __init__(self, in_planes: int, ratio: int = 16):
+        super().__init__()
+        mid = max(1, in_planes // ratio)
+        self.avg_pool = nn.AdaptiveAvgPool2d(1)
+        self.max_pool = nn.AdaptiveMaxPool2d(1)
+        self.f1 = nn.Conv2d(in_planes, mid, 1, bias=False)
+        self.relu = nn.ReLU(inplace=True)
+        self.f2 = nn.Conv2d(mid, in_planes, 1, bias=False)
+        self.sigmoid = nn.Sigmoid()
+
+    def forward(self, x):
+        avg_out = self.f2(self.relu(self.f1(self.avg_pool(x))))
+        max_out = self.f2(self.relu(self.f1(self.max_pool(x))))
+        return self.sigmoid(avg_out + max_out)
+
+
+class SpatialAttention(nn.Module):
+    """CBAM Spatial Attention."""
+
+    def __init__(self, kernel_size: int = 7):
+        super().__init__()
+        assert kernel_size in (3, 7), "kernel_size must be 3 or 7"
+        padding = 3 if kernel_size == 7 else 1
+        self.conv = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False)
+        self.sigmoid = nn.Sigmoid()
+
+    def forward(self, x):
+        avg_out = torch.mean(x, dim=1, keepdim=True)
+        max_out, _ = torch.max(x, dim=1, keepdim=True)
+        x = torch.cat([avg_out, max_out], dim=1)
+        x = self.conv(x)
+        return self.sigmoid(x)
+
+
+class CBAM(nn.Module):
+    """CBAM block as a standalone layer (can be inserted in YAML). Signature dibuat kompatibel dengan pola YOLOv5: (c1,
+    c2, ...) Umumnya dipakai dengan c1 == c2.
+    """
+
+    def __init__(self, c1, c2=None, ratio=16, kernel_size=7):
+        super().__init__()
+        c2 = c1 if c2 is None else c2
+        assert c1 == c2, "CBAM layer expects c1 == c2 (no channel change)."
+        self.ca = ChannelAttention(c1, ratio=ratio)
+        self.sa = SpatialAttention(kernel_size=kernel_size)
+
+    def forward(self, x):
+        x = self.ca(x) * x
+        x = self.sa(x) * x
+        return x
+
+
+class CBAMBottleneck(nn.Module):
+    """Bottleneck + CBAM (used inside C3CBAM)."""
+
+    def __init__(self, c1, c2, shortcut=True, g=1, e=0.5, ratio=16, kernel_size=7):
+        super().__init__()
+        c_ = int(c2 * e)
+        self.cv1 = Conv(c1, c_, 1, 1)
+        self.cv2 = Conv(c_, c2, 3, 1, g=g)
+        self.add = shortcut and c1 == c2
+        self.ca = ChannelAttention(c2, ratio=ratio)
+        self.sa = SpatialAttention(kernel_size=kernel_size)
+
+    def forward(self, x):
+        y = self.cv2(self.cv1(x))
+        y = self.ca(y) * y
+        y = self.sa(y) * y
+        return x + y if self.add else y
+
+
+class C3CBAM(C3):
+    """C3 module with CBAMBottleneck() inside (drop-in replacement for C3)."""
+
+    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, ratio=16, kernel_size=7):
+        super().__init__(c1, c2, n, shortcut, g, e)
+        c_ = int(c2 * e)
+        self.m = nn.Sequential(
+            *(CBAMBottleneck(c_, c_, shortcut, g, e=1.0, ratio=ratio, kernel_size=kernel_size) for _ in range(n))
+        )
diff --git a/models/yolo.py b/models/yolo.py
@@ -27,14 +27,19 @@
 
 from models.common import (
     C3,
+    C3CBAM,
+    C3SE,
     C3SPP,
     C3TR,
+    CBAM,
+    SE,
     SPP,
     SPPF,
     Bottleneck,
     BottleneckCSP,
     C3Ghost,
     C3x,
+    CBAMBottleneck,
     Classify,
     Concat,
     Contract,
@@ -48,6 +53,7 @@
     GhostBottleneck,
     GhostConv,
     Proto,
+    SEBottleneck,
 )
 from models.experimental import MixConv2d
 from utils.autoanchor import check_anchor_order
@@ -421,6 +427,12 @@ def parse_model(d, ch):
             nn.ConvTranspose2d,
             DWConvTranspose2d,
             C3x,
+            SE,
+            SEBottleneck,
+            C3SE,
+            CBAM,
+            CBAMBottleneck,
+            C3CBAM,
         }:
             c1, c2 = ch[f], args[0]
             if c2 != no:  # if not output

diff --git a/models/yolov5s-c3se-backbone.yaml b/models/yolov5s-c3se-backbone.yaml
@@ -0,0 +1,41 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+# Parameters
+nc: 1 # number of classes
+depth_multiple: 0.33 # model depth multiple
+width_multiple: 0.50 # layer channel multiple
+anchors:
+ - [10,13, 16,30, 33,23] # P3/8
+ - [30,61, 62,45, 59,119] # P4/16
+ - [116,90, 156,198, 373,326] # P5/32
+# YOLOv5 v6.0 backbone
+backbone:
+ # [from, number, module, args]
+  [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
+   [-1, 3, C3SE, [128]],
+   [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
+   [-1, 6, C3SE, [256]],
+   [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
+   [-1, 9, C3SE, [512]],
+   [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
+   [-1, 3, C3SE, [1024]],
+   [-1, 1, SPPF, [1024, 5]], # 9
+  ]
+# YOLOv5 v6.0 head
+head:
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]], # cat backbone P4
+   [-1, 3, C3, [512, False]], # 13+1
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]], # cat backbone P3
+   [-1, 3, C3, [256, False]], # 17+1 (P3/8-small)
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]], # cat head P4
+   [-1, 3, C3, [512, False]], # 20+1 (P4/16-medium)
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]], # cat head P5
+   [-1, 3, C3, [1024, False]], # 23+1 (P5/32-large)
+   [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
+  ]
diff --git a/models/yolov5s-c3se.yaml b/models/yolov5s-c3se.yaml
@@ -0,0 +1,41 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+# Parameters
+nc: 1 # number of classes
+depth_multiple: 0.33 # model depth multiple
+width_multiple: 0.50 # layer channel multiple
+anchors:
+ - [10,13, 16,30, 33,23] # P3/8
+ - [30,61, 62,45, 59,119] # P4/16
+ - [116,90, 156,198, 373,326] # P5/32
+# YOLOv5 v6.0 backbone
+backbone:
+ # [from, number, module, args]
+  [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
+   [-1, 3, C3SE, [1024]],
+   [-1, 1, SPPF, [1024, 5]], # 9
+  ]
+# YOLOv5 v6.0 head
+head:
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]], # cat backbone P4
+   [-1, 3, C3, [512, False]], # 13+1
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]], # cat backbone P3
+   [-1, 3, C3, [256, False]], # 17+1 (P3/8-small)
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]], # cat head P4
+   [-1, 3, C3, [512, False]], # 20+1 (P4/16-medium)
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]], # cat head P5
+   [-1, 3, C3, [1024, False]], # 23+1 (P5/32-large)
+   [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
+  ]
diff --git a/models/yolov5s-cbam.yaml b/models/yolov5s-cbam.yaml
@@ -0,0 +1,42 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+# Parameters
+nc: 1 # number of classes
+depth_multiple: 0.33 # model depth multiple
+width_multiple: 0.50 # layer channel multiple
+anchors:
+ - [10,13, 16,30, 33,23] # P3/8
+ - [30,61, 62,45, 59,119] # P4/16
+ - [116,90, 156,198, 373,326] # P5/32
+# YOLOv5 v6.0 backbone
+backbone:
+ # [from, number, module, args]
+  [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
+   [-1, 3, C3, [1024]],
+   [-1, 1, CBAM, [1024]],
+   [-1, 1, SPPF, [1024, 5]], # 9+1
+  ]
+# YOLOv5 v6.0 head
+head:
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]], # cat backbone P4
+   [-1, 3, C3, [512, False]], # 13+1
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]], # cat backbone P3
+   [-1, 3, C3, [256, False]], # 17+1 (P3/8-small)
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 15], 1, Concat, [1]], # cat head P4
+   [-1, 3, C3, [512, False]], # 20+1 (P4/16-medium)
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 11], 1, Concat, [1]], # cat head P5
+   [-1, 3, C3, [1024, False]], # 23+1 (P5/32-large)
+   [[18, 21, 24], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
+  ]
diff --git a/models/yolov5s-se.yaml b/models/yolov5s-se.yaml
@@ -0,0 +1,42 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+# Parameters
+nc: 1 # number of classes
+depth_multiple: 0.33 # model depth multiple
+width_multiple: 0.50 # layer channel multiple
+anchors:
+ - [10,13, 16,30, 33,23] # P3/8
+ - [30,61, 62,45, 59,119] # P4/16
+ - [116,90, 156,198, 373,326] # P5/32
+# YOLOv5 v6.0 backbone
+backbone:
+ # [from, number, module, args]
+  [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
+   [-1, 3, C3, [1024]],
+   [-1, 1, SE, [1024]],
+   [-1, 1, SPPF, [1024, 5]], # 9+1
+  ]
+# YOLOv5 v6.0 head
+head:
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]], # cat backbone P4
+   [-1, 3, C3, [512, False]], # 13+1
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]], # cat backbone P3
+   [-1, 3, C3, [256, False]], # 17+1 (P3/8-small)
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 15], 1, Concat, [1]], # cat head P4
+   [-1, 3, C3, [512, False]], # 20+1 (P4/16-medium)
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 11], 1, Concat, [1]], # cat head P5
+   [-1, 3, C3, [1024, False]], # 23+1 (P5/32-large)
+   [[18, 21, 24], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
+  ]
diff --git a/utils/metrics.py b/utils/metrics.py
@@ -197,7 +197,7 @@ def tp_fp(self):
         return tp[:-1], fp[:-1]  # remove background class
 
     @TryExcept("WARNING ⚠️ ConfusionMatrix plot failure")
-    def plot(self, normalize=True, save_dir="", names=()):
+    def plot(self, normalize=False, save_dir="", names=()):
         """Plots confusion matrix using seaborn, optional normalization; can save plot to specified directory."""
         import seaborn as sn
 

diff --git a/utils/plots.py b/utils/plots.py
@@ -208,7 +208,7 @@ def plot_images(images, targets, paths=None, fname="images.jpg", names=None):
                 color = colors(cls)
                 cls = names[cls] if names else cls
                 if labels or conf[j] > 0.25:  # 0.25 conf thresh
-                    label = f"{cls}" if labels else f"{cls} {conf[j]:.1f}"
+                    label = f"{cls}" if labels else f"{cls} {conf[j]:.2f}"
                     annotator.box_label(box, label, color=color)
     annotator.im.save(fname)  # save