-
Notifications
You must be signed in to change notification settings - Fork 11
Description
Hello, may I ask how to improve the YOLOv8 model based on your code? For example, I have a Python file containing a CBAM module (a built-in module in YOLO) and an MSAA module (not built into YOLO, with the code attached below). How should I modify the YOLO model using these two modules that are in different situations, and then use your code to train this improved YOLO model in MATLAB 2024a? I don't know why, but even though I have modified the file "D:\anaconda3\Lib\site-packages\ultralytics\cfg\models\v8\yolov8.yaml" in the Python runtime path, there has been no change in the model structure or performance. I would also like to ask: Is it possible to use my custom MYYOLO.yaml file for training based on your code? So I'm here to ask for your advice, and thank you very much for your help and answers.
import torch
import torch.nn as nn
class ChannelAttentionModule(nn.Module):
def init(self, in_channels, reduction=4):
super(ChannelAttentionModule, self).init()
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.max_pool = nn.AdaptiveMaxPool2d(1)
self.fc = nn.Sequential(
nn.Conv2d(in_channels, in_channels // reduction, 1, bias=False),
nn.ReLU(inplace=True),
nn.Conv2d(in_channels // reduction, in_channels, 1, bias=False)
)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
avg_out = self.fc(self.avg_pool(x))
max_out = self.fc(self.max_pool(x))
out = avg_out + max_out
return self.sigmoid(out)
class SpatialAttentionModule(nn.Module):
def init(self, kernel_size=7):
super(SpatialAttentionModule, self).init()
self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=kernel_size // 2, bias=False)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
x = torch.cat([avg_out, max_out], dim=1)
x = self.conv1(x)
return self.sigmoid(x)
class MSAA(nn.Module):
def init(self, in_channels, out_channels, factor=4.0):
super(MSAA, self).init()
dim = int(out_channels // factor)
self.down = nn.Conv2d(in_channels, dim, kernel_size=1, stride=1)
self.conv_3x3 = nn.Conv2d(dim, dim, kernel_size=3, stride=1, padding=1)
self.conv_5x5 = nn.Conv2d(dim, dim, kernel_size=5, stride=1, padding=2)
self.conv_7x7 = nn.Conv2d(dim, dim, kernel_size=7, stride=1, padding=3)
self.spatial_attention = SpatialAttentionModule()
self.channel_attention = ChannelAttentionModule(dim)
self.up = nn.Conv2d(dim, out_channels, kernel_size=1, stride=1)
def forward(self, x1, x2, x4)
# x_1_2_fusion = self.fusion_1x2(x1, x2)
# x_1_4_fusion = self.fusion_1x4(x1, x4)
# x_fused = x_1_2_fusion + x_1_4_fusion
x_fused = torch.cat([x1, x2, x4], dim=1)
x_fused = self.down(x_fused)
x_fused_c = x_fused * self.channel_attention(x_fused)
x_3x3 = self.conv_3x3(x_fused)
x_5x5 = self.conv_5x5(x_fused)
x_7x7 = self.conv_7x7(x_fused)
x_fused_s = x_3x3 + x_5x5 + x_7x7
x_fused_s = x_fused_s * self.spatial_attention(x_fused_s)
x_out = self.up(x_fused_s + x_fused_c)
return x_out
if name == 'main':
x = torch.randn(4, 64, 128, 128).cuda()
y = torch.randn(4, 64, 128, 128).cuda()
z = torch.randn(4, 64, 128, 128).cuda()
model = MSAA(192, 64).cuda()
out = model(x, y, z)
print(out.shape)