引入

  • 边缘检测系列第 5 弹,本次继续介绍经典的边缘检测模型

  • Crisp Edge Detection(CED)模型是前面介绍过的 HED 模型的另一种改进模型

  • CED 模型利用自上而下的反向细化路径,并逐渐增加特征图的分辨率以生成清晰的边缘

  • 因为官方没有提供预训练模型,所有本次仅简单介绍一下模型的结构和代码实现

参考资料

效果参考

  • 论文效果图:

模型架构

  • CED 模型总体基于 HED 模型改造而来,其中做了如下几个改进:

    • 将模型中的上采样操作从转置卷积插值更换为 PixelShuffle

    • 添加了反向细化路径,即一个反向的从高层级特征逐步往低层级特征的边缘细化路径

    • 没有多层级输出,最终的输出为融合了各层级的特征的边缘检测结果

  • 架构图如下

PixelShuffle

代码实现

骨干网络

  • 依旧是 VGG16 这个非常经典的骨干网络
import paddle
import paddle.nn as nn

from paddle.utils.download import get_weights_path_from_url
from typing import List


__all__ = []

model_urls = {
    'vgg16': ('https://paddle-hapi.bj.bcebos.com/models/vgg16.pdparams',
              '89bbffc0f87d260be9b8cdc169c991c4')
}


class VGG(nn.Layer):
    """VGG model from
    `"Very Deep Convolutional Networks For Large-Scale Image Recognition" <https://arxiv.org/pdf/1409.1556.pdf>`_

    Args:
        features (nn.Layer): Vgg features create by function make_layers.
        num_classes (int): Output dim of last fc layer. If num_classes <=0, last fc layer 
                            will not be defined. Default: 1000.
        with_pool (bool): Use pool before the last three fc layer or not. Default: True.

    Examples:
        .. code-block:: python

            from paddle.vision.models import VGG
            from paddle.vision.models.vgg import make_layers

            vgg11_cfg = [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M']

            features = make_layers(vgg11_cfg)

            vgg11 = VGG(features)

    """

    def __init__(self, features):
        super(VGG, self).__init__()
        self.features = features
        self.feat_channels = [layer._out_channels
                              for layer in features
                              if isinstance(layer, nn.Conv2D)]

    def forward(self, x):
        outputs = []
        for layer in self.features:
            x = layer(x)
            if isinstance(layer, nn.ReLU):
                outputs.append(x)

        return outputs


def make_layers(cfg, batch_norm=False):
    layers = []
    in_channels = 3
    for v in cfg:
        if v == 'M':
            layers += [nn.MaxPool2D(kernel_size=2, stride=2)]
        else:
            conv2d = nn.Conv2D(in_channels, v, kernel_size=3, padding=1)
            if batch_norm:
                layers += [conv2d, nn.BatchNorm2D(v), nn.ReLU()]
            else:
                layers += [conv2d, nn.ReLU()]
            in_channels = v
    return nn.Sequential(*layers)


cfgs = {
    'D': [                   # return_idx
        64, 64,              # 0, 1
        'M', 128, 128,       # 2, 3
        'M', 256, 256, 256,  # 4, 5, 6
        'M', 512, 512, 512,  # 7, 8, 9
        'M', 512, 512, 512   # 10, 11, 12
    ]
}


def _vgg(arch, cfg, batch_norm, pretrained, **kwargs):
    model = VGG(make_layers(cfgs[cfg], batch_norm=batch_norm), **kwargs)

    if pretrained:
        assert arch in model_urls, "{} model do not have a pretrained model now, you should set pretrained=False".format(
            arch)
        weight_path = get_weights_path_from_url(model_urls[arch][0],
                                                model_urls[arch][1])

        param = paddle.load(weight_path)
        model.load_dict(param)

    return model


def vgg16(pretrained=False, batch_norm=False, **kwargs):
    """VGG 16-layer model 

    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet. Default: False.
        batch_norm (bool): If True, returns a model with batch_norm layer. Default: False.

    Examples:
        .. code-block:: python

            from paddle.vision.models import vgg16

            # build model
            model = vgg16()

            # build vgg16 model with batch_norm
            model = vgg16(batch_norm=True)
    """
    model_name = 'vgg16'
    if batch_norm:
        model_name += ('_bn')
    return _vgg(model_name, 'D', batch_norm, pretrained, **kwargs)

CED 模块

  • 一个简单的多层卷积网络,其中加入了一个 PixelShuffle 进行上采样操作
class CEDBlock(nn.Layer):
    def __init__(self,
                 in_channels_up: int,
                 in_channels_down: int,
                 out_channels_sub: int,
                 out_channels: int) -> None:
        super().__init__()
        self.conv_up = nn.Conv2D(
            in_channels=in_channels_up,
            out_channels=out_channels,
            kernel_size=3,
            stride=1,
            padding=1)

        self.conv_down = nn.Conv2D(
            in_channels=in_channels_down,
            out_channels=out_channels,
            kernel_size=3,
            stride=1,
            padding=1)
        self.conv_sub = nn.Conv2D(
            in_channels=out_channels,
            out_channels=out_channels_sub,
            kernel_size=3,
            stride=1,
            padding=1)

        self.relu = nn.ReLU()
        self.upsample = nn.PixelShuffle(upscale_factor=2)

    def forward(self, up: paddle.Tensor, down: paddle.Tensor) -> paddle.Tensor:
        up = self.conv_up(up)
        up = self.relu(up)

        down = self.conv_down(down)
        down = self.relu(down)

        sub = self.conv_sub(down)

        sub_up = self.upsample(sub)

        return paddle.concat([up, sub_up], axis=1)

CED 模型主体

  • 堆叠五个层级的 CED Block,从高层级特征至低层级特征反向逐层细化边缘特征
  • 最后使用一个卷积层作为模型的输出层计算最终的边缘结果
class CEDHead(nn.Layer):
    def __init__(self,
                 fea_channels: List[int] = [64, 128, 256, 512, 512],
                 num_classes: int = 1) -> None:
        super().__init__()
        self.head_blocks = nn.LayerList()
        for down_channels, up_channels in zip(fea_channels[:0:-1], fea_channels[-2::-1]):
            block = CEDBlock(
                in_channels_up=up_channels,
                in_channels_down=down_channels,
                out_channels_sub=up_channels*2,
                out_channels=up_channels//2)
            self.head_blocks.append(block)

        self.head_output = nn.Conv2D(
            in_channels=fea_channels[0],
            out_channels=num_classes,
            kernel_size=3,
            stride=1,
            padding=1)

    def forward(self, fea_inputs: List[paddle.Tensor]) -> List[paddle.Tensor]:
        fea_inputs = fea_inputs[::-1]
        down_feature = fea_inputs[0]
        for i, block_layer in enumerate(self.head_blocks):
            down_feature = block_layer(fea_inputs[i+1], down_feature)

        return [self.head_output(down_feature)]

CED

class CED(nn.Layer):
    def __init__(self,
                 backbone: nn.Layer = vgg16(),
                 backbone_indices: List[int] = [1, 3, 6, 9, 12],
                 num_classes: int = 1) -> None:
        super().__init__()
        self.backbone = backbone
        self.backbone_indices = backbone_indices
        backbone_channels = [backbone.feat_channels[i]
                             for i in backbone_indices]
        self.head = CEDHead(fea_channels=backbone_channels,
                            num_classes=num_classes)

    def forward(self, inputs: paddle.Tensor) -> List[paddle.Tensor]:
        feat_list = self.backbone(inputs)
        feat_list = [feat_list[i] for i in self.backbone_indices]
        outputs = self.head(feat_list)
        return outputs

模型测试

model = CED()

out = model(paddle.randn((1, 3, 224, 224)))

)

(out[0]).shape
[1, 1, 224, 224]

总结

  • 介绍了一下 CED 模型的主要改进,实现了 CED 模型的代码

  • 不过由于官方没有提供预训练模型,训练的代码也暂时没有迁移完成(咕咕咕),所有目前只有模型的代码实现

Logo

学大模型,用大模型上飞桨星河社区!每天8点V100G算力免费领!免费领取ERNIE 4.0 100w Token >>>

更多推荐