标题转载AI Studio项目https://aistudio.baidu.com/aistudio/projectdetail/3425218

本次智能车线上赛拿到题目后我并没有急于开始训练,因为注意到对fps的要求因此我选择放弃我之前最常使用的Deeplabv3p,类似于segnet,deeplab,pspnet等虽然精度不错但是运行速度是十分慢的,即使使用restnet等Backbone换成moblilenet系列。因此我在查找相关资料时发现了一篇名为:重新思考BiseNet的文章。其的速度和精度要稍高于baseline的BiseNetV2,这是我在替换网络结构上做的提分的思考。并且惊讶的发现paddleseg已经支持该网络了,这里为百度的对paddle高强度的维护点赞

#!unzip data/data125507/data_2022_baseline.zip
%cd work/
!git clone https://gitee.com/paddlepaddle/PaddleSeg.git 
!pip install paddleseg -i https://mirror.baidu.com/pypi/simple
# 首先从gitee上下载PaddleDetection和PaddleSeg程序
# 此步骤已经完成,在此基线环境内,不需要再次进行
# 当前为大家提供了PaddleSeg2.1版本做为基线,用户也可以选择PaddleSeg2.2、2.3或者develop版本。
#%cd work/
!git clone https://gitee.com/paddlepaddle/PaddleSeg.git 

# 查看一下是否下载成功
!ls /home/aistudio/work

# 在AIStudio环境中安装相关依赖
!pip install paddleseg -i https://mirror.baidu.com/pypi/simple
import paddle
import paddle.nn as  nn
import paddle.io as io 
import numpy as np 
import cv2 
import os 
import sys 
%cd /home/aistudio/work/PaddleSeg/
!ls
# # 官方数据已经为我们提供好了train_list.txt和val_list.txt的文件,为了将比赛的数据集与现有的区分开,这里新建一个数据集配置文件
# 新的配置文件命名为car2022.yml,存放路径为:PaddleSeg/configs/_base_/car2022.yml
!ls /home/aistudio/work/PaddleSeg/configs/_base_/

# 语义分割模型使用bisenet,在PaddleSeg/configs/bisenet/路径下,已经写好了针对本次比赛的bisenet_car_480x480.yml
!ls /home/aistudio/work/PaddleSeg/configs/RegSeg/

同时我在翻阅相关资料的时候发现了一篇最新的文章,并且尝试做了复现,这个模型的效果甚至要好于STDC2的效果,但是仅仅只复现了网络结构,并未进行实践训练实验。随文一并呈上
CVPR2021年11月发表了一篇关于重新思考空洞卷积的论文,基于BiseNetV2的基础上做了些许改进。本次任务中需要保证实时性和精确度。论文中提到DeepLab系列运行速度很慢,而实时性网络精度又达不到要求。在mobilenetv3作者的研究下发现,即便在最后一层卷积的通道数减半后精度不会有很大的变化,这暗示着ImageNet的主干网络的冗余性

论文通过ResNeXt网络的Ybolck结构受到启发,设计了一种不需要堆叠其他trick只需要重复的DBlock,这种结构被证实既具有实时性且具保证一定的精度。

网络分为Backbone和Decoder部分,Backbone由大量DBlock结构组成。其Backbone主干特征提取网络结构大致如下:

%cd ~/work/PaddleSeg

PaddleSeg的二次开发:

在使用自己的regseg的时候需要注意的几点:
1.在拉取到的work/PaddleSeg/paddleseg/models添加py文件
2.在自己的py文件中的class部分加上@manager.MODELS.add_component
3.在paddleseg/models的__init__函数中from .xxx import * (xxx为自己的文件)
4.只需要自己配置自己的yml文件和loss就可以使用啦
#RegSeg复现
import paddle
import paddle.nn as  nn
import paddle.io as io 
import numpy as np 
import cv2 
import os 
import sys 
from paddleseg.models import layers
from paddleseg.cvlibs import manager
from paddleseg.utils import utils


class Stem(nn.Layer):
    def __init__(self,in_dim=3,out_dim=32):
        super(Stem,self).__init__()
        self.conv_bn_relu=nn.Sequential(
            nn.Conv2D(in_channels=in_dim,out_channels=out_dim,kernel_size=3,stride=2,padding=1),
            nn.BatchNorm2D(out_dim),
            nn.ReLU()
        )
    
    def forward(self,inputs):
        outputs=self.conv_bn_relu(inputs)
        return outputs

class Conv_Bn_Relu(nn.Layer):
    def __init__(self,in_dim,out_dim,kernel,stride,pad):
        super(Conv_Bn_Relu,self).__init__()
        self.conv_bn_relu=nn.Sequential(
            nn.Conv2D(in_channels=in_dim,out_channels=out_dim,kernel_size=kernel,stride=stride,padding=pad),
            nn.BatchNorm2D(out_dim),
            nn.ReLU()
        )
    def forward(self,inputs):
        outputs=self.conv_bn_relu(inputs)
        return outputs

class DepthWise_Conv(nn.Layer):
    def __init__(self,in_channels,kernel_size=3,stride=1,padding=1,groups=16,dilate=1):
        super(DepthWise_Conv,self).__init__()
        self.conv=nn.Sequential(
            nn.Conv2D(in_channels=in_channels,out_channels=in_channels,kernel_size=kernel_size,stride=stride,padding=padding,dilation=dilate,groups=groups),
            nn.BatchNorm2D(in_channels),
            nn.ReLU()
        )
        
    def forward(self, inputs):
        x=self.conv(inputs)
        return x

class SEBlock(nn.Layer):
    '''
    注意力模块
    '''
    def __init__(self,in_channel,reduce=4):
        super(SEBlock,self).__init__()
        self.avg_pool=nn.AdaptiveAvgPool2D(output_size=1)#平均池化
        self.flatten=nn.Flatten()
        self.fc1=nn.Linear(in_features=in_channel,out_features=in_channel//reduce)#全局池化后的全连接层
        #将缩小的fc层再缩放回原来的维数
        self.fc2=nn.Linear(in_features=in_channel//reduce,out_features=in_channel)
        self.relu=nn.ReLU()
        self.hsigmoid=nn.Hardsigmoid()
    def forward(self,inputs):
        x=self.avg_pool(inputs)#B,C,1,1
        x=self.flatten(x)#B C*1*1
        x=self.fc1(x)
        x=self.relu(x)
        x=self.fc2(x)
        x=self.hsigmoid(x)#得到注意力值
        x=x.reshape((inputs.shape[0],inputs.shape[1],1,1))
        #print('x_shape:',x.shape,'  inputs_shape',inputs.shape)
        output=x*inputs
        return output

class DBlock(nn.Layer):
    def __init__(self,in_dim,out_dim,stride,d2):
        super(DBlock,self).__init__()
        #all block g=16
        self.g=16
        self.stride=stride
        self.conv_1x1_in=Conv_Bn_Relu(in_dim=in_dim,out_dim=256,kernel=1,stride=1,pad=0)
        dkernel=d2*2+1
        same_pad=(dkernel-stride+1)//2
        self.split_DW1=DepthWise_Conv(in_channels=256,kernel_size=3,stride=stride,padding=1,dilate=1,groups=self.g)
        self.split_DW2=DepthWise_Conv(in_channels=256,kernel_size=3,stride=stride,padding=same_pad,dilate=d2,groups=self.g)
        self.se=SEBlock(in_channel=512,reduce=4)
        if stride==1: 
            self.conv_1x1_out=Conv_Bn_Relu(in_dim=512,out_dim=in_dim,kernel=1,stride=1,pad=0)
        else:
            self.conv_1x1_out=Conv_Bn_Relu(in_dim=512,out_dim=out_dim,kernel=1,stride=1,pad=0)
        
        self.avgpool=nn.AvgPool2D(kernel_size=2,stride=2)
        self.point_Conv=Conv_Bn_Relu(in_dim=in_dim,out_dim=out_dim,kernel=1,stride=1,pad=0)
    def forward(self,inputs):
        h=inputs
        x=self.conv_1x1_in(inputs)
        split=x
        x1=self.split_DW1(split)
        x2=self.split_DW2(x)
        concat_=paddle.concat([x1,x2],axis=1)
        se=self.se(concat_)
        out1=self.conv_1x1_out(se)
        if(self.stride==2):
            h=self.avgpool(h)
            h=self.point_Conv(h)
        outputs=h+out1
        return outputs

class DBlock_last(nn.Layer):
    def __init__(self,in_dim,out_dim,stride=1,d2=14):
        super(DBlock_last,self).__init__()
        #all block g=16
        self.g=16
        self.stride=stride
        self.conv_1x1_in=Conv_Bn_Relu(in_dim=in_dim,out_dim=256,kernel=1,stride=1,pad=0)
        dkernel=d2*2+1
        same_pad=(dkernel-stride+1)//2
        self.split_DW1=DepthWise_Conv(in_channels=256,kernel_size=3,stride=stride,padding=1,dilate=1,groups=self.g)
        self.split_DW2=DepthWise_Conv(in_channels=256,kernel_size=3,stride=stride,padding=same_pad,dilate=d2,groups=self.g)
        self.se=SEBlock(in_channel=512,reduce=4)
        self.conv_1x1_out=Conv_Bn_Relu(in_dim=512,out_dim=out_dim,kernel=1,stride=1,pad=0)
        self.conv=Conv_Bn_Relu(in_dim=in_dim,out_dim=out_dim,kernel=1,stride=1,pad=0)
    def forward(self,inputs):
        h=inputs
        x=self.conv_1x1_in(inputs)
        split=x
        x1=self.split_DW1(split)
        x2=self.split_DW2(x)
        concat_=paddle.concat([x1,x2],axis=1)
        se=self.se(concat_)
        out1=self.conv_1x1_out(se)
        h=self.conv(h)
        outputs=h+out1
        return outputs

@manager.MODELS.add_component
class RegSeg(nn.Layer):
    def __init__(self,num_classes=3,pretrained=None):
        super(RegSeg,self).__init__()
        self.out_dims=[32,48,128,256,256,256,256,320]
        self.strides=[2,2,2,2,1,1,1,1]
        self.repeat= [3,2,1,4,6]
        self.dilate_rate=[1,1,1,2,4,14,14]
        self.stem=Stem(in_dim=3,out_dim=self.out_dims[0])
        self.dblock1=DBlock(in_dim=self.out_dims[0],out_dim=self.out_dims[1],stride=self.strides[1],d2=1)
        self.dblock2=DBlock(in_dim=self.out_dims[1],out_dim=self.out_dims[2],stride=self.strides[2],d2=1)
        self.dblock2_re=DBlock(in_dim=self.out_dims[2],out_dim=self.out_dims[2],stride=1,d2=1)
        #1/4
        self.dblock3=DBlock(in_dim=self.out_dims[2],out_dim=self.out_dims[3],stride=self.strides[3],d2=1)
        self.dblock3_re=DBlock(in_dim=self.out_dims[3],out_dim=self.out_dims[3],stride=1,d2=1)
        #1/8
        self.dblock4=DBlock(in_dim=self.out_dims[3],out_dim=self.out_dims[4],stride=self.strides[4],d2=2)
        self.dblock5=DBlock(in_dim=self.out_dims[4],out_dim=self.out_dims[5],stride=self.strides[5],d2=4)
        self.dblock5_re=DBlock(in_dim=self.out_dims[5],out_dim=self.out_dims[5],stride=1,d2=4)
        self.dblock6=DBlock(in_dim=self.out_dims[5],out_dim=self.out_dims[6],stride=self.strides[6],d2=14)
        self.dblock6_re=DBlock(in_dim=self.out_dims[6],out_dim=self.out_dims[6],stride=1,d2=14)
        self.dblock_last=DBlock_last(in_dim=self.out_dims[6],out_dim=self.out_dims[7],stride=1,d2=14)

        self._16_dimconv=Conv_Bn_Relu(in_dim=320,out_dim=128,kernel=1,stride=1,pad=0)
        self._8_dimconv=Conv_Bn_Relu(in_dim=256,out_dim=128,kernel=1,stride=1,pad=0)
        self.conv2=Conv_Bn_Relu(in_dim=128,out_dim=64,kernel=3,stride=1,pad=1)
        self._4_dimconv=Conv_Bn_Relu(in_dim=128,out_dim=8,kernel=1,stride=1,pad=0)
        self.conv_last1=Conv_Bn_Relu(in_dim=72,out_dim=64,kernel=3,stride=1,pad=0)
        self.conv_last2=nn.Conv2D(in_channels=64,out_channels=19,kernel_size=1,stride=1,padding=0)
    def forward(self,inps):
        x=self.stem(inps)
        x=self.dblock1(x)
        x=self.dblock2(x)
        for i in range(self.repeat[0]):
            x=self.dblock2_re(x)
        out1_4=x
        x=self.dblock3(x)
        for i in range(self.repeat[1]):
            x=self.dblock3_re(x)
        out2_8=x
        x=self.dblock4(x)
        x=self.dblock5(x)
        for i in range(self.repeat[3]):
            x=self.dblock5_re(x)
        x=self.dblock6(x)
        for i in range(self.repeat[4]):
            x=self.dblock6_re(x)
        x=self.dblock_last(x)
        out_16=x
        #1/16与1/8变dim
        out_up_16=self._16_dimconv(out_16)
        out_up_8=paddle.nn.functional.interpolate(out_up_16,size=[out2_8.shape[2],out2_8.shape[3]])
        out_sum_8=self._8_dimconv(out2_8)
        out_sum_8=out_sum_8+out_up_8
        out_sum_8=self.conv2(out_sum_8)
        out_sum_8=paddle.nn.functional.interpolate(out_sum_8,size=[out1_4.shape[2],out1_4.shape[3]])
        out_4_dimconv=self._4_dimconv(out1_4)
        concat_last=paddle.concat([out_sum_8,out_4_dimconv],axis=1)
        outputs=self.conv_last1(concat_last)
        outputs=self.conv_last2(outputs)
        outputs=paddle.nn.functional.interpolate(outputs,size=[inps.shape[2],inps.shape[3]])
        return outputs
    def init_weight(self):
        if self.pretrained is not None:
            utils.load_entire_model(self, self.pretrained)

模型训练(STDC2)

!python train.py --config configs/stdcseg/stdc.yml --do_eval --save_interval 400

剩下步骤略,与官方baselline相同

Logo

学大模型,用大模型上飞桨星河社区!每天8点V100G算力免费领!免费领取ERNIE 4.0 100w Token >>>

更多推荐