Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6,346 changes: 6,346 additions & 0 deletions YOLO_+_Attention_Module.ipynb

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions detect.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,8 @@ def run(
source=ROOT / "data/images", # file/dir/URL/glob/screen/0(webcam)
data=ROOT / "data/coco128.yaml", # dataset.yaml path
imgsz=(640, 640), # inference size (height, width)
conf_thres=0.25, # confidence threshold
iou_thres=0.45, # NMS IOU threshold
conf_thres=0.3, # confidence threshold
iou_thres=0.1, # NMS IOU threshold
max_det=1000, # maximum detections per image
device="", # cuda device, i.e. 0 or 0,1,2,3 or cpu
view_img=False, # show results
Expand Down
134 changes: 134 additions & 0 deletions models/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1109,3 +1109,137 @@ def forward(self, x):
if isinstance(x, list):
x = torch.cat(x, 1)
return self.linear(self.drop(self.pool(self.conv(x)).flatten(1)))


class SE(nn.Module):
"""Squeeze-and-Excitation (SE) block."""

def __init__(self, channels: int, reduction: int = 16):
super().__init__()
mid = max(1, channels // reduction)
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.fc = nn.Sequential(
nn.Linear(channels, mid, bias=False),
nn.ReLU(inplace=True),
nn.Linear(mid, channels, bias=False),
nn.Sigmoid(),
)

def forward(self, x):
b, c, _, _ = x.size()
y = self.avg_pool(x).view(b, c)
y = self.fc(y).view(b, c, 1, 1)
return x * y


class SEBottleneck(nn.Module):
"""Bottleneck + SE. Aman dipakai di dalam C3SE karena biasanya c1 == c2 (hidden channels). Signature mengikuti
Bottleneck: (c1, c2, shortcut, g, e).
"""

def __init__(self, c1, c2, shortcut=True, g=1, e=0.5, se_reduction=16):
super().__init__()
c_ = int(c2 * e)
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = Conv(c_, c2, 3, 1, g=g)
self.add = shortcut and c1 == c2
self.se = SE(c2, reduction=se_reduction)

def forward(self, x):
y = self.cv2(self.cv1(x))
y = self.se(y)
return x + y if self.add else y


class C3SE(C3):
"""C3 module with SEBottleneck() inside (drop-in replacement for C3)."""

def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, se_reduction=16):
super().__init__(c1, c2, n, shortcut, g, e)
c_ = int(c2 * e)
self.m = nn.Sequential(*(SEBottleneck(c_, c_, shortcut, g, e=1.0, se_reduction=se_reduction) for _ in range(n)))


class ChannelAttention(nn.Module):
"""CBAM Channel Attention."""

def __init__(self, in_planes: int, ratio: int = 16):
super().__init__()
mid = max(1, in_planes // ratio)
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.max_pool = nn.AdaptiveMaxPool2d(1)
self.f1 = nn.Conv2d(in_planes, mid, 1, bias=False)
self.relu = nn.ReLU(inplace=True)
self.f2 = nn.Conv2d(mid, in_planes, 1, bias=False)
self.sigmoid = nn.Sigmoid()

def forward(self, x):
avg_out = self.f2(self.relu(self.f1(self.avg_pool(x))))
max_out = self.f2(self.relu(self.f1(self.max_pool(x))))
return self.sigmoid(avg_out + max_out)


class SpatialAttention(nn.Module):
"""CBAM Spatial Attention."""

def __init__(self, kernel_size: int = 7):
super().__init__()
assert kernel_size in (3, 7), "kernel_size must be 3 or 7"
padding = 3 if kernel_size == 7 else 1
self.conv = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False)
self.sigmoid = nn.Sigmoid()

def forward(self, x):
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
x = torch.cat([avg_out, max_out], dim=1)
x = self.conv(x)
return self.sigmoid(x)


class CBAM(nn.Module):
"""CBAM block as a standalone layer (can be inserted in YAML). Signature dibuat kompatibel dengan pola YOLOv5: (c1,
c2, ...) Umumnya dipakai dengan c1 == c2.
"""

def __init__(self, c1, c2=None, ratio=16, kernel_size=7):
super().__init__()
c2 = c1 if c2 is None else c2
assert c1 == c2, "CBAM layer expects c1 == c2 (no channel change)."
self.ca = ChannelAttention(c1, ratio=ratio)
self.sa = SpatialAttention(kernel_size=kernel_size)

def forward(self, x):
x = self.ca(x) * x
x = self.sa(x) * x
return x


class CBAMBottleneck(nn.Module):
"""Bottleneck + CBAM (used inside C3CBAM)."""

def __init__(self, c1, c2, shortcut=True, g=1, e=0.5, ratio=16, kernel_size=7):
super().__init__()
c_ = int(c2 * e)
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = Conv(c_, c2, 3, 1, g=g)
self.add = shortcut and c1 == c2
self.ca = ChannelAttention(c2, ratio=ratio)
self.sa = SpatialAttention(kernel_size=kernel_size)

def forward(self, x):
y = self.cv2(self.cv1(x))
y = self.ca(y) * y
y = self.sa(y) * y
return x + y if self.add else y


class C3CBAM(C3):
"""C3 module with CBAMBottleneck() inside (drop-in replacement for C3)."""

def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, ratio=16, kernel_size=7):
super().__init__(c1, c2, n, shortcut, g, e)
c_ = int(c2 * e)
self.m = nn.Sequential(
*(CBAMBottleneck(c_, c_, shortcut, g, e=1.0, ratio=ratio, kernel_size=kernel_size) for _ in range(n))
)
12 changes: 12 additions & 0 deletions models/yolo.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,19 @@

from models.common import (
C3,
C3CBAM,
C3SE,
C3SPP,
C3TR,
CBAM,
SE,
SPP,
SPPF,
Bottleneck,
BottleneckCSP,
C3Ghost,
C3x,
CBAMBottleneck,
Classify,
Concat,
Contract,
Expand All @@ -48,6 +53,7 @@
GhostBottleneck,
GhostConv,
Proto,
SEBottleneck,
)
from models.experimental import MixConv2d
from utils.autoanchor import check_anchor_order
Expand Down Expand Up @@ -421,6 +427,12 @@ def parse_model(d, ch):
nn.ConvTranspose2d,
DWConvTranspose2d,
C3x,
SE,
SEBottleneck,
C3SE,
CBAM,
CBAMBottleneck,
C3CBAM,
}:
c1, c2 = ch[f], args[0]
if c2 != no: # if not output
Expand Down
41 changes: 41 additions & 0 deletions models/yolov5s-c3se-backbone.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Parameters
nc: 1 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.50 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
 [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
  [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
  [-1, 3, C3SE, [128]],
  [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
  [-1, 6, C3SE, [256]],
  [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
  [-1, 9, C3SE, [512]],
  [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
  [-1, 3, C3SE, [1024]],
  [-1, 1, SPPF, [1024, 5]], # 9
 ]
# YOLOv5 v6.0 head
head:
 [[-1, 1, Conv, [512, 1, 1]],
  [-1, 1, nn.Upsample, [None, 2, 'nearest']],
  [[-1, 6], 1, Concat, [1]], # cat backbone P4
  [-1, 3, C3, [512, False]], # 13+1
  [-1, 1, Conv, [256, 1, 1]],
  [-1, 1, nn.Upsample, [None, 2, 'nearest']],
  [[-1, 4], 1, Concat, [1]], # cat backbone P3
  [-1, 3, C3, [256, False]], # 17+1 (P3/8-small)
  [-1, 1, Conv, [256, 3, 2]],
  [[-1, 14], 1, Concat, [1]], # cat head P4
  [-1, 3, C3, [512, False]], # 20+1 (P4/16-medium)
  [-1, 1, Conv, [512, 3, 2]],
  [[-1, 10], 1, Concat, [1]], # cat head P5
  [-1, 3, C3, [1024, False]], # 23+1 (P5/32-large)
  [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
 ]
41 changes: 41 additions & 0 deletions models/yolov5s-c3se.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Parameters
nc: 1 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.50 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
 [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
  [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
  [-1, 3, C3, [128]],
  [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
  [-1, 6, C3, [256]],
  [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
  [-1, 9, C3, [512]],
  [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
  [-1, 3, C3SE, [1024]],
  [-1, 1, SPPF, [1024, 5]], # 9
 ]
# YOLOv5 v6.0 head
head:
 [[-1, 1, Conv, [512, 1, 1]],
  [-1, 1, nn.Upsample, [None, 2, 'nearest']],
  [[-1, 6], 1, Concat, [1]], # cat backbone P4
  [-1, 3, C3, [512, False]], # 13+1
  [-1, 1, Conv, [256, 1, 1]],
  [-1, 1, nn.Upsample, [None, 2, 'nearest']],
  [[-1, 4], 1, Concat, [1]], # cat backbone P3
  [-1, 3, C3, [256, False]], # 17+1 (P3/8-small)
  [-1, 1, Conv, [256, 3, 2]],
  [[-1, 14], 1, Concat, [1]], # cat head P4
  [-1, 3, C3, [512, False]], # 20+1 (P4/16-medium)
  [-1, 1, Conv, [512, 3, 2]],
  [[-1, 10], 1, Concat, [1]], # cat head P5
  [-1, 3, C3, [1024, False]], # 23+1 (P5/32-large)
  [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
 ]
42 changes: 42 additions & 0 deletions models/yolov5s-cbam.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Parameters
nc: 1 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.50 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
 [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
  [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
  [-1, 3, C3, [128]],
  [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
  [-1, 6, C3, [256]],
  [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
  [-1, 9, C3, [512]],
  [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
  [-1, 3, C3, [1024]],
  [-1, 1, CBAM, [1024]],
  [-1, 1, SPPF, [1024, 5]], # 9+1
 ]
# YOLOv5 v6.0 head
head:
 [[-1, 1, Conv, [512, 1, 1]],
  [-1, 1, nn.Upsample, [None, 2, 'nearest']],
  [[-1, 6], 1, Concat, [1]], # cat backbone P4
  [-1, 3, C3, [512, False]], # 13+1
  [-1, 1, Conv, [256, 1, 1]],
  [-1, 1, nn.Upsample, [None, 2, 'nearest']],
  [[-1, 4], 1, Concat, [1]], # cat backbone P3
  [-1, 3, C3, [256, False]], # 17+1 (P3/8-small)
  [-1, 1, Conv, [256, 3, 2]],
  [[-1, 15], 1, Concat, [1]], # cat head P4
  [-1, 3, C3, [512, False]], # 20+1 (P4/16-medium)
  [-1, 1, Conv, [512, 3, 2]],
  [[-1, 11], 1, Concat, [1]], # cat head P5
  [-1, 3, C3, [1024, False]], # 23+1 (P5/32-large)
  [[18, 21, 24], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
 ]
42 changes: 42 additions & 0 deletions models/yolov5s-se.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Parameters
nc: 1 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.50 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
 [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
  [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
  [-1, 3, C3, [128]],
  [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
  [-1, 6, C3, [256]],
  [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
  [-1, 9, C3, [512]],
  [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
  [-1, 3, C3, [1024]],
  [-1, 1, SE, [1024]],
  [-1, 1, SPPF, [1024, 5]], # 9+1
 ]
# YOLOv5 v6.0 head
head:
 [[-1, 1, Conv, [512, 1, 1]],
  [-1, 1, nn.Upsample, [None, 2, 'nearest']],
  [[-1, 6], 1, Concat, [1]], # cat backbone P4
  [-1, 3, C3, [512, False]], # 13+1
  [-1, 1, Conv, [256, 1, 1]],
  [-1, 1, nn.Upsample, [None, 2, 'nearest']],
  [[-1, 4], 1, Concat, [1]], # cat backbone P3
  [-1, 3, C3, [256, False]], # 17+1 (P3/8-small)
  [-1, 1, Conv, [256, 3, 2]],
  [[-1, 15], 1, Concat, [1]], # cat head P4
  [-1, 3, C3, [512, False]], # 20+1 (P4/16-medium)
  [-1, 1, Conv, [512, 3, 2]],
  [[-1, 11], 1, Concat, [1]], # cat head P5
  [-1, 3, C3, [1024, False]], # 23+1 (P5/32-large)
  [[18, 21, 24], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
 ]
2 changes: 1 addition & 1 deletion utils/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ def tp_fp(self):
return tp[:-1], fp[:-1] # remove background class

@TryExcept("WARNING ⚠️ ConfusionMatrix plot failure")
def plot(self, normalize=True, save_dir="", names=()):
def plot(self, normalize=False, save_dir="", names=()):
"""Plots confusion matrix using seaborn, optional normalization; can save plot to specified directory."""
import seaborn as sn

Expand Down
2 changes: 1 addition & 1 deletion utils/plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ def plot_images(images, targets, paths=None, fname="images.jpg", names=None):
color = colors(cls)
cls = names[cls] if names else cls
if labels or conf[j] > 0.25: # 0.25 conf thresh
label = f"{cls}" if labels else f"{cls} {conf[j]:.1f}"
label = f"{cls}" if labels else f"{cls} {conf[j]:.2f}"
annotator.box_label(box, label, color=color)
annotator.im.save(fname) # save

Expand Down
Loading
Loading