当前位置：首页 > news >正文

复现EfficientNet

news 2025/7/16 8:08:41

Efficient-B0的总体结构，其中Conv=(Conv+BN_Swish),其中，如果一个module重复2次或者2次以上，那么stride=2仅仅是重复中第1次时候的参数，后续的重复module的stride=1

+-------+--------------------------+------------+--------------+---------+--------+
| Stage |          Module          | input_size | out_channels | repeats | stride |
+-------+--------------------------+------------+--------------+---------+--------+
|   1   |        Conv(3x3)         |  224x224   |      32      |    1    |   2    |
|   2   |       MBConv1,k3x3       |  112x112   |      16      |    1    |   1    |
|   3   |       MBConv6,k3x3       |  112x112   |      24      |    2    |   2    |
|   4   |       MBConv6,k5x5       |   56x56    |      40      |    2    |   2    |
|   5   |       MBConv6,k3x3       |   28x28    |      80      |    3    |   2    |
|   6   |       MBConv6,k5x5       |   14x14    |     112      |    3    |   1    |
|   7   |       MBConv6,k5x5       |   14x14    |     192      |    4    |   2    |
|   8   |       MBConv6,k3x3       |    7x7     |     320      |    1    |   1    |
|   9   | Conv(1x1) & Pooling & FC |    7x7     |     1280     |    1    |  None  |
+-------+--------------------------+------------+--------------+---------+--------+

MBConv Module

MBConv6：第1个升维的1x1卷积层，它的卷积核个数是输入矩阵的channel的6倍

当MBConv1时，不使用第1个升维的1x1卷积层，即Stage2中的MBConv结构中没有第1个升维的1x1的卷积层，

shortcut：当且仅当输入MBConv的矩阵与输出矩阵的shape相同时才存在

MBConv: MobilenetV3_Conv
image ---> Conv(1x1,cin->cin*n) --> Depthwise --> SE --> ConvBN(1x1,cin*n->cout)-->dropout--> + --->output|       升维                                           降维                            ||                                                                                     |---------------------------------------------------------------------------------------
其中升维之后进行BN+Swish, Depthwise之后进行BN+Swish,降维之后，只有BN

配置参数

import torch 
import torch.nn as nnfrom math import ceilbase_model=[# expand_ratio, channels, repeats, stride, kernel_size[1, 16, 1, 1, 3],[6, 24, 2, 2, 3],[6, 40, 2, 2, 5],[6, 80, 3, 2, 3],[6, 112,3, 1, 5],[6, 192,4, 2, 5],[6, 320,1, 1, 3],
]phi_values = {# tuple of :(phi_value, resolution, drop_rate)"b0": (0, 224, 0.2 ), # alpha, beta, gamma, depth  = alphi*phi"b1": (0.5, 240, 0.2), "b2": (1, 260, 0.3),"b3": (2, 300, 0.3),"b4": (3, 380, 0.4),"b5": (4, 456, 0.4),"b6": (5, 528, 0.5),"b7": (6, 600, 0.5),
}

卷积+BN+Swish

# CBS :Conv+Bn+Silu
class CNNBlock(nn.Module):def __init__(self, int_channels, out_channels, kernel_size, stride, padding, groups=1):super(CNNBlock,self).__init__()self.cnn = nn.Conv2d(int_channels,out_channels,kernel_size,stride, padding,groups = groups,bias=False)# group=1:normal conv,    group=int_channels:Depthwise convself.bn = nn.BatchNorm2d(out_channels)self.silu = nn.SiLU() # Silu <-> Swishdef forward(self,x):x = self.cnn(x)x = self.bn(x)x = self.silu(x)return x

SE注意力机制

# SE attention
class SqueezeExcitation(nn.Module):def __init__(self,in_channels ,reduced_dim):super(SqueezeExcitation,self).__init__()self.se = nn.Sequential(nn.AdaptiveAvgPool2d(1), # [B, C, H, W] -> [b, C, 1, 1]nn.Conv2d(in_channels,reduced_dim, 1),   # [b, r, 1, 1]nn.SiLU(),nn.Conv2d(reduced_dim,in_channels,1),    # [b, C, 1, 1]nn.Sigmoid())def forward(self,x):return x * self.se(x)       # [B,C,H,W]  * [B,C,1,1]

MBConv

class InvertedResidualBlock(nn.Module):def __init__(self,in_channels, out_channels, kernel_size,stride, padding, expand_ratio, reduction = 4, # squeeze excitation survival_prob = 0.8 # for stochastic depth) :super(InvertedResidualBlock, self).__init__()self.survival_prob = 0.8self.use_residual = in_channels == out_channels and stride == 1hidden_dim = in_channels * expand_ratioself.expand = in_channels != hidden_dimreduced_dim = int(in_channels / reduction)# increase dimension, when expand_ratio=1this CNNBlock is discarded.if self.expand:self.expand_conv = CNNBlock(# in_channels,hidden_dim,kernel_size=3, stride = 1, padding = 1,in_channels,hidden_dim,kernel_size=1, stride = 1, padding = 0,)self.conv = nn.Sequential(CNNBlock(hidden_dim,hidden_dim,kernel_size,stride, padding,groups=hidden_dim),SqueezeExcitation(hidden_dim, reduced_dim),nn.Conv2d(hidden_dim, out_channels, 1, bias=False),nn.BatchNorm2d(out_channels))# randomly drop (or keep) layers during training# Dropout layers are only present in the this modules that are connected using shortcutsdef stochastic_depth(self,x):if not self.training:return xbinary_tensor = torch.rand(x.shape[0],1,1,1, device=x.device) < self.survival_probreturn torch.div(x, self.survival_prob) * binary_tensordef forward(self, inputs):x = self.expand_conv(inputs) if self.expand else inputsif self.use_residual:x = self.conv(x)x = self.stochastic_depth(x)x = x + inputsreturn xelse :return self.conv(x)

EfficientNet

class EfficientNet(nn.Module):def __init__(self, version, num_classes) :super(EfficientNet, self).__init__()width_factor, depth_factor,dropout_rate = self.calculate_factors(version)last_channels = ceil(1280 * width_factor)self.pool = nn.AdaptiveAvgPool2d(1)self.features = self.create_features(width_factor,depth_factor,last_channels)self.classifier = nn.Sequential(nn.Dropout(dropout_rate),nn.Linear(last_channels, num_classes),)def calculate_factors(self, version, alpha = 0.2, beta = 1.1):phi, res, drop_rate = phi_values[version]depth_factor = alpha ** phiwidth_factor = beta ** phireturn width_factor, depth_factor, drop_ratedef create_features(self, width_factor,depth_factor,last_channels):channels = int(32 * width_factor)features = [CNNBlock(3, channels, 3, stride=2, padding=1)]in_channels = channelsfor expand_ratio, channels, repeats, stride, kernel_size in base_model:out_channels = 4 * ceil( int(channels*width_factor) / 4)layer_repeats = ceil(repeats * depth_factor)for layer in range(layer_repeats):features.append(InvertedResidualBlock(in_channels,out_channels,expand_ratio = expand_ratio, stride = stride if layer == 0 else 1,kernel_size = kernel_size,padding = kernel_size // 2,))in_channels = out_channelsfeatures.append(CNNBlock(in_channels,last_channels,kernel_size=1, stride=1, padding=0))return nn.Sequential( *features )def forward(self,x):x = self.features(x)x = self.pool(x)x = self.classifier( x.view( x.shape[0], -1 ) )return x

test

def test():device = "cuda" if torch.cuda.is_available() else "cpu"version = "b0"phi, res, drop_rate = phi_values[version]num_examples, num_classes = 4, 10x = torch.randn( (num_examples, 3, res, res) ).to(device)model = EfficientNet(version=version, num_classes=num_classes).to(device)print(model(x).shape)
test()

查看全文

http://www.mrgr.cn/news/52955.html