CTR-GCN

论文名称:Channel-wise Topology Refinement Graph Convolution for Skeleton-Based
Action Recognition

1. 摘要

图卷积网络(GCNs)在基于骨骼的动作识别中得到了广泛的应用,并取得了显著的效果。在GCNs中,图的拓扑结构主导着特征聚合,因此是提取具有代表性的特征的关键。在这项工作中,我们提出了一种新的Channel-wise Topology Refinement Graph Convolution(CTR-GC)来动态学习不同的拓扑,并有效地聚合不同通道中的联合特征,用于基于骨架的动作识别。提出的CTR-GC通过学习共享拓扑作为所有通道的通用先验,并对每个信道的特定相关性重新细化来建模channel-wise topologies。我们的方法引入了一些额外的参数,并显著降低了建模channel-wise topologies的难度。.此外,通过将图卷积重新构造为一个统一的形式,我们发现CTR-GC放宽了图卷积的严格约束,从而导致更强的表示能力。将CTR-GC与时间建模模块相结合,我们开发了一个名为CTR-GCN的强大的图卷积网络,它在NTU RGB+D、NTU RGB+D120和NW-UCLA数据集上明显优于最先进的方法。

2. CTR-GC

在这里插入图片描述

[bs,channels,时间维度T,空间维度V] 其中channels是图中的C,V是图中的N


def einsum(x1, x3):
    """paddle.einsum only support in dynamic graph mode.
    x1 : n c u v
    x2 : n c t v
    """
    n, c, u, v1 = x1.shape
    n, c, t, v3 = x3.shape
    assert (v1 == v3), "Args of einsum not match!"
    x1 = paddle.transpose(x1, perm=[0, 1, 3, 2])  # n c v u
    y = paddle.matmul(x3, x1)
    # out: n c t u
    return y

#################################
        self.conv1 = nn.Conv2D(self.in_channels,
                               self.rel_channels,
                               kernel_size=1)
        self.conv2 = nn.Conv2D(self.in_channels,
                               self.rel_channels,
                               kernel_size=1)
        self.conv3 = nn.Conv2D(self.in_channels,
                               self.out_channels,
                               kernel_size=1)
        self.conv4 = nn.Conv2D(self.rel_channels,
                               self.out_channels,
                               kernel_size=1)
        self.tanh = nn.Tanh()

    def forward(self, x, A=None, alpha=1):
        #x.shape = [bs,in_channels,T,V]
        x1, x2, x3 = self.conv1(x).mean(-2), self.conv2(x).mean(-2), self.conv3(
            x)#x1.shape = x2.shape =[bs,rel_channels,V] x3.shape ==[bs,out_channels,T,V]
        x1 = self.tanh(x1.unsqueeze(-1) - x2.unsqueeze(-2)) # x1.shape = [bs,rel_channels,V,V]
        x1 = self.conv4(x1) * alpha + (
            A.unsqueeze(0).unsqueeze(0) if A is not None else 0)  # bs,out_channels,V,V
        # We only support 'paddle.einsum()' in dynamic graph mode, if use in infer model please implement self.
        # x1 = paddle.einsum('ncuv,nctv->nctu', x1, x3)
        x1 = einsum(x1, x3)
        return x1

对于时间卷积Layer详情请见MultiScale_TemporalConv Class

在这里插入图片描述
左图为TCN_GCN_unit一个基本架构,右图为CTR-GC的补充。

3. 数据集介绍:

依旧采用fsd-10花样滑冰数据集

维度符号表示维度值大小维度含义补充说明
N样本数代表N个样本
C3分别代表每个关节点的x,y坐标和置信度每个x,y均被放缩至-1到1之间
T1000代表动作的持续时间长度,共有1000帧有的动作的实际长度可能超过1000,于是我们就抽取其中1000帧,代码见autopadding
V25代表25个关节点具体关节点的含义可看下方的骨架示例图
M1代表1个运动员个数

骨架示例图(注意8号索引关键点为人体中心):

4. CTR-GCN代码部分

# 导包
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
import numpy as np
from weight_init import weight_init_

4.1 模型组网架构(Backbone部分)



def zero(x):
    return 0


def iden(x):
    return x




def get_hop_distance(num_node, edge, max_hop=1):
    A = np.zeros((num_node, num_node))
    for i, j in edge:
        A[j, i] = 1
        A[i, j] = 1

    # compute hop steps
    hop_dis = np.zeros((num_node, num_node)) + np.inf
    transfer_mat = [np.linalg.matrix_power(A, d) for d in range(max_hop + 1)]
    arrive_mat = (np.stack(transfer_mat) > 0)
    for d in range(max_hop, -1, -1):
        hop_dis[arrive_mat[d]] = d
    return hop_dis


def normalize_digraph(A):
    Dl = np.sum(A, 0)
    num_node = A.shape[0]
    Dn = np.zeros((num_node, num_node))
    for i in range(num_node):
        if Dl[i] > 0:
            Dn[i, i] = Dl[i]**(-1)
    AD = np.dot(A, Dn)
    return AD


class Graph():
    def __init__(self,
                 layout='openpose',
                 strategy='uniform',
                 max_hop=1,
                 dilation=1):
        self.max_hop = max_hop
        self.dilation = dilation

        self.get_edge(layout)
        self.hop_dis = get_hop_distance(self.num_node,
                                        self.edge,
                                        max_hop=max_hop)
        self.get_adjacency(strategy)

    def __str__(self):
        return self.A

    def get_edge(self, layout):
        # edge is a list of [child, parent] paris

        if layout == 'fsd10':
            self.num_node = 25
            self_link = [(i, i) for i in range(self.num_node)]
            neighbor_link = [(1, 8), (0, 1), (15, 0), (17, 15), (16, 0),
                             (18, 16), (5, 1), (6, 5), (7, 6), (2, 1), (3, 2),
                             (4, 3), (9, 8), (10, 9), (11, 10), (24, 11),
                             (22, 11), (23, 22), (12, 8), (13, 12), (14, 13),
                             (21, 14), (19, 14), (20, 19)]
            self.edge = self_link + neighbor_link
            self.center = 8
        elif layout == 'ntu-rgb+d':
            self.num_node = 25
            self_link = [(i, i) for i in range(self.num_node)]
            neighbor_1base = [(1, 2), (2, 21), (3, 21), (4, 3), (5, 21), (6, 5),
                              (7, 6), (8, 7), (9, 21), (10, 9), (11, 10),
                              (12, 11), (13, 1), (14, 13), (15, 14), (16, 15),
                              (17, 1), (18, 17), (19, 18), (20, 19), (22, 23),
                              (23, 8), (24, 25), (25, 12)]
            neighbor_link = [(i - 1, j - 1) for (i, j) in neighbor_1base]
            self.edge = self_link + neighbor_link
            self.center = 21 - 1
        else:
            raise ValueError("Do Not Exist This Layout.")

    def get_adjacency(self, strategy):
        valid_hop = range(0, self.max_hop + 1, self.dilation)
        adjacency = np.zeros((self.num_node, self.num_node))
        for hop in valid_hop:
            adjacency[self.hop_dis == hop] = 1
        normalize_adjacency = normalize_digraph(adjacency)

        if strategy == 'spatial':
            A = []
            for hop in valid_hop:
                a_root = np.zeros((self.num_node, self.num_node))
                a_close = np.zeros((self.num_node, self.num_node))
                a_further = np.zeros((self.num_node, self.num_node))
                for i in range(self.num_node):
                    for j in range(self.num_node):
                        if self.hop_dis[j, i] == hop:
                            if self.hop_dis[j, self.center] == self.hop_dis[
                                    i, self.center]:
                                a_root[j, i] = normalize_adjacency[j, i]
                            elif self.hop_dis[j, self.center] > self.hop_dis[
                                    i, self.center]:
                                a_close[j, i] = normalize_adjacency[j, i]
                            else:
                                a_further[j, i] = normalize_adjacency[j, i]
                if hop == 0:
                    A.append(a_root)
                else:
                    A.append(a_root + a_close)
                    A.append(a_further)
            A = np.stack(A)
            self.A = A
        else:
            raise ValueError("Do Not Exist This Strategy")

def conv_init(conv):
    if conv.weight is not None:
        weight_init_(conv.weight, 'kaiming_normal_', mode='fan_in')
    if conv.bias is not None:
        nn.initializer.Constant(value=0.0)(conv.bias)


def bn_init(bn, scale):
    nn.initializer.Constant(value=float(scale))(bn.weight)
    nn.initializer.Constant(value=0.0)(bn.bias)


def einsum(x1, x3):
    """paddle.einsum only support in dynamic graph mode.
    x1 : n c u v
    x2 : n c t v
    """
    n, c, u, v1 = x1.shape
    n, c, t, v3 = x3.shape
    assert (v1 == v3), "Args of einsum not match!"
    x1 = paddle.transpose(x1, perm=[0, 1, 3, 2])  # n c v u
    y = paddle.matmul(x3, x1)
    # out: n c t u
    return y


class CTRGC(nn.Layer):

    def __init__(self,
                 in_channels,
                 out_channels,
                 rel_reduction=8,
                 mid_reduction=1):
        super(CTRGC, self).__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        # if in_channels == 3 or in_channels == 9:
        #     self.rel_channels = 8
        #     self.mid_channels = 16
        # else:
        #     self.rel_channels = in_channels // rel_reduction
        #     self.mid_channels = in_channels // mid_reduction

        self.rel_channels = 8
        self.mid_channels = 16

        # print(self.in_channels,self.rel_channels)
        self.conv1 = nn.Conv2D(self.in_channels,
                               self.rel_channels,
                               kernel_size=1)
        self.conv2 = nn.Conv2D(self.in_channels,
                               self.rel_channels,
                               kernel_size=1)
        self.conv3 = nn.Conv2D(self.in_channels,
                               self.out_channels,
                               kernel_size=1)
        self.conv4 = nn.Conv2D(self.rel_channels,
                               self.out_channels,
                               kernel_size=1)
        self.tanh = nn.Tanh()

    def init_weights(self):
        """Initiate the parameters.
        """
        for m in self.sublayers():
            if isinstance(m, nn.Conv2D):
                conv_init(m)
            elif isinstance(m, nn.BatchNorm2D):
                bn_init(m, 1)

    def forward(self, x, A=None, alpha=1):
        #x.shape = []
        x1, x2, x3 = self.conv1(x).mean(-2), self.conv2(x).mean(-2), self.conv3(
            x)
        x1 = self.tanh(x1.unsqueeze(-1) - x2.unsqueeze(-2))
        x1 = self.conv4(x1) * alpha + (
            A.unsqueeze(0).unsqueeze(0) if A is not None else 0)  # N,C,V,V
        # We only support 'paddle.einsum()' in dynamic graph mode, if use in infer model please implement self.
        # x1 = paddle.einsum('ncuv,nctv->nctu', x1, x3)
        x1 = einsum(x1, x3)
        return x1


class TemporalConv(nn.Layer):

    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride=1,
                 dilation=1):
        super(TemporalConv, self).__init__()
        pad = (kernel_size + (kernel_size - 1) * (dilation - 1) - 1) // 2
        self.conv = nn.Conv2D(in_channels,
                              out_channels,
                              kernel_size=(kernel_size, 1),
                              padding=(pad, 0),
                              stride=(stride, 1),
                              dilation=(dilation, 1))

        self.bn = nn.BatchNorm2D(out_channels)

    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        return x


class MultiScale_TemporalConv(nn.Layer):

    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size=3,
                 stride=1,
                 dilations=[1, 2, 3, 4],
                 residual=True,
                 residual_kernel_size=1):

        super(MultiScale_TemporalConv, self).__init__()
        assert out_channels % (
            len(dilations) +
            2) == 0, '# out channels should be multiples of # branches'

        # Multiple branches of temporal convolution
        self.num_branches = len(dilations) + 2
        branch_channels = out_channels // self.num_branches
        if type(kernel_size) == list:
            assert len(kernel_size) == len(dilations)
        else:
            kernel_size = [kernel_size] * len(dilations)
        # Temporal Convolution branches
        self.branches = nn.LayerList([
            nn.Sequential(
                nn.Conv2D(in_channels,
                          branch_channels,
                          kernel_size=1,
                          padding=0),
                nn.BatchNorm2D(branch_channels),
                nn.ReLU(),
                TemporalConv(branch_channels,
                             branch_channels,
                             kernel_size=ks,
                             stride=stride,
                             dilation=dilation),
            ) for ks, dilation in zip(kernel_size, dilations)
        ])

        # Additional Max & 1x1 branch
        self.branches.append(
            nn.Sequential(
                nn.Conv2D(in_channels,
                          branch_channels,
                          kernel_size=1,
                          padding=0), nn.BatchNorm2D(branch_channels),
                nn.ReLU(),
                nn.MaxPool2D(kernel_size=(3, 1),
                             stride=(stride, 1),
                             padding=(1, 0)), nn.BatchNorm2D(branch_channels)))

        self.branches.append(
            nn.Sequential(
                nn.Conv2D(in_channels,
                          branch_channels,
                          kernel_size=1,
                          padding=0,
                          stride=(stride, 1)), nn.BatchNorm2D(branch_channels)))

        # Residual connection
        if not residual:
            self.residual = lambda x: 0
        elif (in_channels == out_channels) and (stride == 1):
            self.residual = lambda x: x
        else:
            self.residual = TemporalConv(in_channels,
                                         out_channels,
                                         kernel_size=residual_kernel_size,
                                         stride=stride)

    def init_weights(self):
        """Initiate the parameters.
        """
        # initialize
        for m in self.sublayers():
            if isinstance(m, nn.Conv2D):
                conv_init(m)
            elif isinstance(m, nn.BatchNorm2D):
                weight_init_(m.weight, 'Normal', std=0.02, mean=1.0)
                nn.initializer.Constant(value=0.0)(m.bias)

    def forward(self, x):
        # Input dim: (N,C,T,V)
        res = self.residual(x)
        branch_outs = []
        for tempconv in self.branches:
            out = tempconv(x)
            branch_outs.append(out)

        out = paddle.concat(branch_outs, axis=1)
        out += res
        return out


class unit_tcn(nn.Layer):

    def __init__(self, in_channels, out_channels, kernel_size=9, stride=1):
        super(unit_tcn, self).__init__()
        pad = int((kernel_size - 1) / 2)
        self.conv = nn.Conv2D(in_channels,
                              out_channels,
                              kernel_size=(kernel_size, 1),
                              padding=(pad, 0),
                              stride=(stride, 1))

        self.bn = nn.BatchNorm2D(out_channels)
        self.relu = nn.ReLU()
        conv_init(self.conv)
        bn_init(self.bn, 1)

    def forward(self, x):
        x = self.bn(self.conv(x))
        return x


class unit_gcn(nn.Layer):

    def __init__(self,
                 in_channels,
                 out_channels,
                 A,
                 coff_embedding=4,
                 adaptive=True,
                 residual=True):
        super(unit_gcn, self).__init__()
        inter_channels = out_channels // coff_embedding
        self.inter_c = inter_channels
        self.out_c = out_channels
        self.in_c = in_channels
        self.adaptive = adaptive
        self.num_subset = A.shape[0]
        self.convs = nn.LayerList()

        for i in range(self.num_subset):
            self.convs.append(CTRGC(in_channels, out_channels))

        if residual:
            if in_channels != out_channels:
                self.down = nn.Sequential(
                    nn.Conv2D(in_channels, out_channels, 1),
                    nn.BatchNorm2D(out_channels))
            else:
                self.down = lambda x: x
        else:
            self.down = lambda x: 0
        if self.adaptive:
            pa_param = paddle.ParamAttr(
                initializer=paddle.nn.initializer.Assign(A.astype(np.float32)))
            self.PA = paddle.create_parameter(shape=A.shape,
                                              dtype='float32',
                                              attr=pa_param)
        else:
            A_tensor = paddle.to_tensor(A, dtype="float32")
            self.A = paddle.create_parameter(
                shape=A_tensor.shape,
                dtype='float32',
                default_initializer=paddle.nn.initializer.Assign(A_tensor))
            self.A.stop_gradient = True
        alpha_tensor = paddle.to_tensor(np.zeros(1), dtype="float32")
        self.alpha = paddle.create_parameter(
            shape=alpha_tensor.shape,
            dtype='float32',
            default_initializer=paddle.nn.initializer.Assign(alpha_tensor))
        self.bn = nn.BatchNorm2D(out_channels)
        self.soft = nn.Softmax(-2)
        self.relu = nn.ReLU()

    def init_weights(self):
        for m in self.sublayers():
            if isinstance(m, nn.Conv2D):
                conv_init(m)
            elif isinstance(m, nn.BatchNorm2D):
                bn_init(m, 1)
        bn_init(self.bn, 1e-6)

    def forward(self, x):
        y = None
        if self.adaptive:
            A = self.PA
        else:
            A = self.A.cuda(x.get_device())
        for i in range(self.num_subset):
            z = self.convs[i](x, A[i], self.alpha)
            y = z + y if y is not None else z
        y = self.bn(y)
        y += self.down(x)
        y = self.relu(y)
        return y


class TCN_GCN_unit(nn.Layer):

    def __init__(self,
                 in_channels,
                 out_channels,
                 A,
                 stride=1,
                 residual=True,
                 adaptive=True,
                 kernel_size=5,
                 dilations=[1, 2]):
        super(TCN_GCN_unit, self).__init__()
        self.gcn1 = unit_gcn(in_channels, out_channels, A, adaptive=adaptive)
        self.tcn1 = MultiScale_TemporalConv(out_channels,
                                            out_channels,
                                            kernel_size=kernel_size,
                                            stride=stride,
                                            dilations=dilations,
                                            residual=False)
        self.relu = nn.ReLU()
        if not residual:
            self.residual = lambda x: 0

        elif (in_channels == out_channels) and (stride == 1):
            self.residual = lambda x: x

        else:
            self.residual = unit_tcn(in_channels,
                                     out_channels,
                                     kernel_size=1,
                                     stride=stride)

    def forward(self, x):
        y = self.relu(self.tcn1(self.gcn1(x)) + self.residual(x))
        return y



class CTRGCN(nn.Layer):
    """
    CTR-GCN model from:
    `"Channel-wise Topology Refinement Graph Convolution for Skeleton-Based Action Recognition" <https://arxiv.org/abs/2107.12213>`_
    Args:
        num_point: int, numbers of sketeton point.
        num_person: int, numbers of person.
        base_channel: int, model's hidden dim.
        graph: str, sketeton adjacency matrix name.
        graph_args: dict, sketeton adjacency graph class args.
        in_channels: int, channels of vertex coordinate. 2 for (x,y), 3 for (x,y,z). Default 3.
        adaptive: bool, if adjacency matrix can adaptive.
    """

    def __init__(self,
                 num_point=25,
                 num_person=1,
                 base_channel=64,
                 graph='ntu_rgb_d',
                 in_channels=2,
                 adaptive=True):
        super(CTRGCN, self).__init__()


        self.graph = Graph(layout='fsd10',
                 strategy='spatial')
        A = self.graph.A  # 3,25,25

        self.num_point = num_point
        self.data_bn = nn.BatchNorm1D(num_person * in_channels * num_point)
        self.base_channel = base_channel

        self.l1 = TCN_GCN_unit(in_channels,
                               self.base_channel,
                               A,
                               residual=False,
                               adaptive=adaptive)
        self.l2 = TCN_GCN_unit(self.base_channel,
                               self.base_channel,
                               A,
                               adaptive=adaptive)
        self.l3 = TCN_GCN_unit(self.base_channel,
                               self.base_channel,
                               A,
                               adaptive=adaptive)
        self.l4 = TCN_GCN_unit(self.base_channel,
                               self.base_channel,
                               A,
                               adaptive=adaptive)
        self.l5 = TCN_GCN_unit(self.base_channel,
                               self.base_channel * 2,
                               A,
                               stride=2,
                               adaptive=adaptive)
        self.l6 = TCN_GCN_unit(self.base_channel * 2,
                               self.base_channel * 2,
                               A,
                               adaptive=adaptive)
        self.l7 = TCN_GCN_unit(self.base_channel * 2,
                               self.base_channel * 2,
                               A,
                               adaptive=adaptive)
        self.l8 = TCN_GCN_unit(self.base_channel * 2,
                               self.base_channel * 4,
                               A,
                               stride=2,
                               adaptive=adaptive)
        self.l9 = TCN_GCN_unit(self.base_channel * 4,
                               self.base_channel * 4,
                               A,
                               adaptive=adaptive)
        self.l10 = TCN_GCN_unit(self.base_channel * 4,
                                self.base_channel * 4,
                                A,
                                adaptive=adaptive)

    def init_weights(self):
        bn_init(self.data_bn, 1)

    def forward(self, x):
        N, C, T, V, M = x.shape
        x = paddle.transpose(x, perm=[0, 4, 3, 1, 2])
        x = paddle.reshape(x, (N, M * V * C, T))

        x = self.data_bn(x)

        x = paddle.reshape(x, (N, M, V, C, T))
        x = paddle.transpose(x, perm=(0, 1, 3, 4, 2))

        x = paddle.reshape(x, (N * M, C, T, V))

        x = self.l1(x)
        x = self.l2(x)
        x = self.l3(x)
        x = self.l4(x)
        x = self.l5(x)
        x = self.l6(x)
        x = self.l7(x)
        x = self.l8(x)
        x = self.l9(x)
        x = self.l10(x)

        return x, N, M
x = paddle.randn([16,2,350,25,1])
len(CTRGCN()(x))
W0816 15:02:06.405629   162 gpu_resources.cc:61] Please NOTE: device: 0, GPU Compute Capability: 7.0, Driver API Version: 11.2, Runtime API Version: 10.1
W0816 15:02:06.409224   162 gpu_resources.cc:91] device: 0, cuDNN Version: 7.6.
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/nn/layer/norm.py:654: UserWarning: When training, we now always track global mean and variance.
  "When training, we now always track global mean and variance.")





3

4.2 模型组网架构(Head部分)

class CTRGCNHead(nn.Layer):
    """
    Head for CTR-GCN model.
    Args:
        in_channels: int, input feature channels. Default: 64.
        num_classes: int, output the number of classes.
        drop_out: float, dropout ratio of layer. Default: 0.
    """

    def __init__(self, in_channels=64, num_classes=30, drop_out=0, **kwargs):
        super().__init__()
        self.in_channels = in_channels
        self.drop_out = drop_out
        self.num_classes = num_classes

        self.fc = nn.Linear(self.in_channels * 4, self.num_classes)
        if drop_out:
            self.drop_out = nn.Dropout(self.drop_out)
        else:
            self.drop_out = lambda x: x
        self.init_weights()

    def init_weights(self):
        """Initiate the parameters.
        """
        for layer in self.sublayers():
            if isinstance(layer, nn.Conv2D):
                weight_init_(layer.weight,
                             'Normal',
                             mean=0.0,
                             std=math.sqrt(2. / self.num_classes))

    def forward(self, output_patch):
        """Define how the head is going to run.
        """
        x, N, M = output_patch
        # N*M,C,T,V
        _, c_new, T, V = x.shape
        x = paddle.reshape(x, shape=[N, M, c_new, T * V])
        x = x.mean(3).mean(1)
        x = self.drop_out(x)

        return self.fc(x)

4.3 模型组网架构(framework部分)

class CTRGCN_framework(nn.Layer):
    def __init__(self,num_classes = 30):
        super().__init__()
        self.backbone = CTRGCN()
        self.head = CTRGCNHead(num_classes = num_classes)
    def forward(self,data):
        feature = self.backbone(data)
        cls_score = self.head(feature)
        return cls_score

x = paddle.randn([16,2,350,25,1])
CTRGCN_framework()(x).shape
[16, 30]

4.4 数据Dataset预处理

import numpy as np
class AutoPadding(object):
    """
    Sample or Padding frame skeleton feature.
    Args:
        window_size: int, temporal size of skeleton feature.
        random_pad: bool, whether do random padding when frame length < window size. Default: False.
    """

    def __init__(self, window_size, random_pad=False):
        self.window_size = window_size
        self.random_pad = random_pad

    def get_frame_num(self, data):
        C, T, V, M = data.shape
        for i in range(T - 1, -1, -1):
            tmp = np.sum(data[:, i, :, :])
            if tmp > 0:
                T = i + 1
                break
        return T

    def __call__(self, results):
        # data = results['data']
        data = results

        C, T, V, M = data.shape
        T = self.get_frame_num(data)
        if T == self.window_size:
            data_pad = data[:, :self.window_size, :, :]
        elif T < self.window_size:
            begin = random.randint(0, self.window_size -
                                   T) if self.random_pad else 0
            data_pad = np.zeros((C, self.window_size, V, M))
            data_pad[:, begin:begin + T, :, :] = data[:, :T, :, :]
        else:
            if self.random_pad:
                index = np.random.choice(T, self.window_size,
                                         replace=False).astype('int64')
            else:
                index = np.linspace(0, T-1, self.window_size).astype("int64")
            data_pad = data[:, index, :, :]

        # results['data'] = data_pad
        # return results
        return data_pad
#每7个取一个当验证集
train_index = []
valid_index= []
for i in range(2922):
    if i%7 !=1:
        train_index.append(i)
    else:
        valid_index.append(i)
train_index =np.array(train_index).astype("int64")
valid_index = np.array(valid_index).astype("int64")
import paddle
import numpy as np
import paddle.nn.functional as F
from visualdl import LogWriter
from tqdm import tqdm
log_writer = LogWriter("./log/gnet")
class Dataset(paddle.io.Dataset):
    def __init__(self,is_train = True):
        data = np.load("/home/aistudio/data/data104925/train_data.npy").astype("float32") #[2922, 3, 2500, 25, 1]
        label = np.load("/home/aistudio/data/data104925/train_label.npy")

        split_index = 2400
        self.autopad = AutoPadding(window_size= 1000)
        self.train_data = data[train_index,:,:,:,:]
        self.valid_data = data[valid_index,:,:,:,:]
        self.train_label = label[train_index]
        self.valid_label = label[valid_index]
        self.is_train = is_train
        if self.is_train == True:
            self.size = len(self.train_data)
        else:
            self.size = len(self.valid_data)
        

    def __getitem__(self, index):
        if self.is_train == True:
            one_row = self.train_data[index]
            one_label = self.train_label[index]
        else:
            one_row = self.valid_data[index]
            one_label = self.valid_label[index]
        one_row = one_row[:2, :, :, :]
        one_row = self.autopad(one_row).astype("float32")
        return one_row,one_label

    def __len__(self):
        return self.size
BATCH_SIZE =16

train_dataset = Dataset()
data_loader = paddle.io.DataLoader(train_dataset,batch_size=BATCH_SIZE,shuffle =True,drop_last=True)

for data in data_loader:
    print(data[0].shape,data[1].shape)
    break
[16, 2, 1000, 25, 1] [16]
def valid_accurary(valid_loader,classifer_net):
     with paddle.set_grad_enabled(False):
        acc_all = 0
        num = 0 
        for one in valid_loader:
            img_data,cls=one
            # print()
            out = classifer_net(img_data)
            # print(out.shape)
            # out = nn.Softmax()(out)
            # out = paddle.multinomial(out, num_samples=1, replacement=False, name=None)
            acc = paddle.metric.accuracy(out,cls.unsqueeze(1))
            acc_all+=acc.numpy()[0]
            num+=1
            # if out[0] == cls:
                # right +=1
        # print("right",right)
        return acc_all/num
valid_dataset = Dataset(is_train=False)
valid_loader = paddle.io.DataLoader(valid_dataset,batch_size=16,shuffle =True,drop_last=True)

4.5 测试验证集准确率(我自己joint模式的参数文件)

我自己训练的准确率为0.60


valid_dataset = Dataset(is_train=False)
valid_loader = paddle.io.DataLoader(valid_dataset,batch_size=16,shuffle =True,drop_last=True)

ctrgcn = CTRGCN_framework()
# agcn.set_state_dict(paddle.load("AGCN_fsd.pdparams"))
ctrgcn.set_state_dict(paddle.load("Gmodel_state0.60.pdparams"))
# print(agcn.backbone.l4.gcn1.alpha.numpy())
# print(agcn.backbone.l5.gcn1.alpha.numpy())

print("自己准确率",valid_accurary(valid_loader,ctrgcn))
自己准确率 0.6057692307692307

4.6 自己进行训练

import paddle.nn as nn
from learning_rates import CustomWarmupAdjustDecay
ctrgcn = CTRGCN_framework()
# scheduler_G = paddle.optimizer.lr.CosineAnnealingDecay(learning_rate =0.05, T_max =60, eta_min=0, last_epoch=- 1, verbose=True)
scheduler_G = CustomWarmupAdjustDecay(step_base_lr =0.1, warmup_epochs =5, lr_decay_rate=0.1, boundaries = [35, 55],num_iters = 0.1,verbose=True)

optimizer = paddle.optimizer.Momentum(learning_rate=scheduler_G, momentum=0.9, parameters=ctrgcn.parameters(), use_nesterov=False, weight_decay=1e-4, grad_clip=None, name=None)
Epoch 0: CustomWarmupAdjustDecay set learning rate to 0.02.
import os
epoches =100
i = 0
smooth_label= True
v_acc_max = 0.
crossEntropyLoss =nn.CrossEntropyLoss(soft_label = smooth_label)
for epoch in range(epoches):
    print("epoch",epoch)
    for data in tqdm(data_loader):
        
        one_data,cls=data
        out = ctrgcn(one_data)
        optimizer.clear_grad()
        if smooth_label ==True:
            labels = F.one_hot(cls, 30)
            # print(labels.shape)
            labels = F.label_smooth(labels, epsilon=0.1)
            # print(labels.shape)
            # labels = paddle.squeeze(labels, axis=1)
            loss = crossEntropyLoss(out,labels)
        else:
            loss = crossEntropyLoss(out,cls)

        loss.backward()
        optimizer.step()


        log_writer.add_scalar(tag='train/loss', step=i, value=loss.numpy()[0])


        if i%100 == 3:
            print("loss",loss.numpy()[0],v_acc_max)
            
        i+=1
        # break

    if epoch%2 == 0:
        ctrgcn.eval()
        v_acc = valid_accurary(valid_loader,ctrgcn)
        ctrgcn.train()
        print("epoch loss",loss.numpy()[0],v_acc)
        log_writer.add_scalar(tag='train/v_acc', step=i, value=v_acc)
        if v_acc > v_acc_max:
            v_acc_max = v_acc
            save_param_path_model = os.path.join("model", 'Gmodel_state'+str(v_acc_max)+'.pdparams')
            paddle.save(ctrgcn.state_dict(), save_param_path_model)

    scheduler_G.step()
    # break

5. 总结

其中TCN把channels分成几个分支的做法我挺喜欢,利用不同的空洞,然后这个动态拓扑对我挺新鲜。

此文章为搬运
原项目链接

Logo

学大模型,用大模型上飞桨星河社区!每天8点V100G算力免费领!免费领取ERNIE 4.0 100w Token >>>

更多推荐