Skip to content

Improve yolo model #26

@listening510

Description

@listening510

Hello, may I ask how to improve the YOLOv8 model based on your code? For example, I have a Python file containing a CBAM module (a built-in module in YOLO) and an MSAA module (not built into YOLO, with the code attached below). How should I modify the YOLO model using these two modules that are in different situations, and then use your code to train this improved YOLO model in MATLAB 2024a? I don't know why, but even though I have modified the file "D:\anaconda3\Lib\site-packages\ultralytics\cfg\models\v8\yolov8.yaml" in the Python runtime path, there has been no change in the model structure or performance. I would also like to ask: Is it possible to use my custom MYYOLO.yaml file for training based on your code? So I'm here to ask for your advice, and thank you very much for your help and answers.

import torch
import torch.nn as nn

class ChannelAttentionModule(nn.Module):
def init(self, in_channels, reduction=4):
super(ChannelAttentionModule, self).init()
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.max_pool = nn.AdaptiveMaxPool2d(1)
self.fc = nn.Sequential(
nn.Conv2d(in_channels, in_channels // reduction, 1, bias=False),
nn.ReLU(inplace=True),
nn.Conv2d(in_channels // reduction, in_channels, 1, bias=False)
)
self.sigmoid = nn.Sigmoid()

def forward(self, x):
    avg_out = self.fc(self.avg_pool(x))
    max_out = self.fc(self.max_pool(x))
    out = avg_out + max_out
    return self.sigmoid(out)

class SpatialAttentionModule(nn.Module):
def init(self, kernel_size=7):
super(SpatialAttentionModule, self).init()
self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=kernel_size // 2, bias=False)
self.sigmoid = nn.Sigmoid()

def forward(self, x):
    avg_out = torch.mean(x, dim=1, keepdim=True)
    max_out, _ = torch.max(x, dim=1, keepdim=True)
    x = torch.cat([avg_out, max_out], dim=1)
    x = self.conv1(x)
    return self.sigmoid(x)

class MSAA(nn.Module):
def init(self, in_channels, out_channels, factor=4.0):
super(MSAA, self).init()
dim = int(out_channels // factor)
self.down = nn.Conv2d(in_channels, dim, kernel_size=1, stride=1)
self.conv_3x3 = nn.Conv2d(dim, dim, kernel_size=3, stride=1, padding=1)
self.conv_5x5 = nn.Conv2d(dim, dim, kernel_size=5, stride=1, padding=2)
self.conv_7x7 = nn.Conv2d(dim, dim, kernel_size=7, stride=1, padding=3)
self.spatial_attention = SpatialAttentionModule()
self.channel_attention = ChannelAttentionModule(dim)
self.up = nn.Conv2d(dim, out_channels, kernel_size=1, stride=1)

def forward(self, x1, x2, x4)
    # x_1_2_fusion = self.fusion_1x2(x1, x2)
    # x_1_4_fusion = self.fusion_1x4(x1, x4)
    # x_fused = x_1_2_fusion + x_1_4_fusion
    x_fused = torch.cat([x1, x2, x4], dim=1)
    x_fused = self.down(x_fused)
    x_fused_c = x_fused * self.channel_attention(x_fused)
    x_3x3 = self.conv_3x3(x_fused)
    x_5x5 = self.conv_5x5(x_fused)
    x_7x7 = self.conv_7x7(x_fused)
    x_fused_s = x_3x3 + x_5x5 + x_7x7
    x_fused_s = x_fused_s * self.spatial_attention(x_fused_s)

    x_out = self.up(x_fused_s + x_fused_c)

    return x_out

if name == 'main':
x = torch.randn(4, 64, 128, 128).cuda()
y = torch.randn(4, 64, 128, 128).cuda()
z = torch.randn(4, 64, 128, 128).cuda()
model = MSAA(192, 64).cuda()
out = model(x, y, z)
print(out.shape)

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions