PPSIG:CTR-GCN 动作识别
数据集为fsd-10花样滑冰,项目为CTR-GCN动作识别,可直接运行,acc为0.60
CTR-GCN
论文名称:Channel-wise Topology Refinement Graph Convolution for Skeleton-Based
Action Recognition
1. 摘要
图卷积网络(GCNs)在基于骨骼的动作识别中得到了广泛的应用,并取得了显著的效果。在GCNs中,图的拓扑结构主导着特征聚合,因此是提取具有代表性的特征的关键。在这项工作中,我们提出了一种新的Channel-wise Topology Refinement Graph Convolution(CTR-GC)来动态学习不同的拓扑,并有效地聚合不同通道中的联合特征,用于基于骨架的动作识别。提出的CTR-GC通过学习共享拓扑作为所有通道的通用先验,并对每个信道的特定相关性重新细化来建模channel-wise topologies。我们的方法引入了一些额外的参数,并显著降低了建模channel-wise topologies的难度。.此外,通过将图卷积重新构造为一个统一的形式,我们发现CTR-GC放宽了图卷积的严格约束,从而导致更强的表示能力。将CTR-GC与时间建模模块相结合,我们开发了一个名为CTR-GCN的强大的图卷积网络,它在NTU RGB+D、NTU RGB+D120和NW-UCLA数据集上明显优于最先进的方法。
2. CTR-GC
[bs,channels,时间维度T,空间维度V] 其中channels是图中的C,V是图中的N
def einsum(x1, x3):
"""paddle.einsum only support in dynamic graph mode.
x1 : n c u v
x2 : n c t v
"""
n, c, u, v1 = x1.shape
n, c, t, v3 = x3.shape
assert (v1 == v3), "Args of einsum not match!"
x1 = paddle.transpose(x1, perm=[0, 1, 3, 2]) # n c v u
y = paddle.matmul(x3, x1)
# out: n c t u
return y
#################################
self.conv1 = nn.Conv2D(self.in_channels,
self.rel_channels,
kernel_size=1)
self.conv2 = nn.Conv2D(self.in_channels,
self.rel_channels,
kernel_size=1)
self.conv3 = nn.Conv2D(self.in_channels,
self.out_channels,
kernel_size=1)
self.conv4 = nn.Conv2D(self.rel_channels,
self.out_channels,
kernel_size=1)
self.tanh = nn.Tanh()
def forward(self, x, A=None, alpha=1):
#x.shape = [bs,in_channels,T,V]
x1, x2, x3 = self.conv1(x).mean(-2), self.conv2(x).mean(-2), self.conv3(
x)#x1.shape = x2.shape =[bs,rel_channels,V] x3.shape ==[bs,out_channels,T,V]
x1 = self.tanh(x1.unsqueeze(-1) - x2.unsqueeze(-2)) # x1.shape = [bs,rel_channels,V,V]
x1 = self.conv4(x1) * alpha + (
A.unsqueeze(0).unsqueeze(0) if A is not None else 0) # bs,out_channels,V,V
# We only support 'paddle.einsum()' in dynamic graph mode, if use in infer model please implement self.
# x1 = paddle.einsum('ncuv,nctv->nctu', x1, x3)
x1 = einsum(x1, x3)
return x1
对于时间卷积Layer详情请见MultiScale_TemporalConv Class
左图为TCN_GCN_unit一个基本架构,右图为CTR-GC的补充。
3. 数据集介绍:
依旧采用fsd-10花样滑冰数据集
维度符号表示 | 维度值大小 | 维度含义 | 补充说明 |
---|---|---|---|
N | 样本数 | 代表N个样本 | 无 |
C | 3 | 分别代表每个关节点的x,y坐标和置信度 | 每个x,y均被放缩至-1到1之间 |
T | 1000 | 代表动作的持续时间长度,共有1000帧 | 有的动作的实际长度可能超过1000,于是我们就抽取其中1000帧,代码见autopadding |
V | 25 | 代表25个关节点 | 具体关节点的含义可看下方的骨架示例图 |
M | 1 | 代表1个运动员个数 | 无 |
骨架示例图(注意8号索引关键点为人体中心):
4. CTR-GCN代码部分
# 导包
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
import numpy as np
from weight_init import weight_init_
4.1 模型组网架构(Backbone部分)
def zero(x):
return 0
def iden(x):
return x
def get_hop_distance(num_node, edge, max_hop=1):
A = np.zeros((num_node, num_node))
for i, j in edge:
A[j, i] = 1
A[i, j] = 1
# compute hop steps
hop_dis = np.zeros((num_node, num_node)) + np.inf
transfer_mat = [np.linalg.matrix_power(A, d) for d in range(max_hop + 1)]
arrive_mat = (np.stack(transfer_mat) > 0)
for d in range(max_hop, -1, -1):
hop_dis[arrive_mat[d]] = d
return hop_dis
def normalize_digraph(A):
Dl = np.sum(A, 0)
num_node = A.shape[0]
Dn = np.zeros((num_node, num_node))
for i in range(num_node):
if Dl[i] > 0:
Dn[i, i] = Dl[i]**(-1)
AD = np.dot(A, Dn)
return AD
class Graph():
def __init__(self,
layout='openpose',
strategy='uniform',
max_hop=1,
dilation=1):
self.max_hop = max_hop
self.dilation = dilation
self.get_edge(layout)
self.hop_dis = get_hop_distance(self.num_node,
self.edge,
max_hop=max_hop)
self.get_adjacency(strategy)
def __str__(self):
return self.A
def get_edge(self, layout):
# edge is a list of [child, parent] paris
if layout == 'fsd10':
self.num_node = 25
self_link = [(i, i) for i in range(self.num_node)]
neighbor_link = [(1, 8), (0, 1), (15, 0), (17, 15), (16, 0),
(18, 16), (5, 1), (6, 5), (7, 6), (2, 1), (3, 2),
(4, 3), (9, 8), (10, 9), (11, 10), (24, 11),
(22, 11), (23, 22), (12, 8), (13, 12), (14, 13),
(21, 14), (19, 14), (20, 19)]
self.edge = self_link + neighbor_link
self.center = 8
elif layout == 'ntu-rgb+d':
self.num_node = 25
self_link = [(i, i) for i in range(self.num_node)]
neighbor_1base = [(1, 2), (2, 21), (3, 21), (4, 3), (5, 21), (6, 5),
(7, 6), (8, 7), (9, 21), (10, 9), (11, 10),
(12, 11), (13, 1), (14, 13), (15, 14), (16, 15),
(17, 1), (18, 17), (19, 18), (20, 19), (22, 23),
(23, 8), (24, 25), (25, 12)]
neighbor_link = [(i - 1, j - 1) for (i, j) in neighbor_1base]
self.edge = self_link + neighbor_link
self.center = 21 - 1
else:
raise ValueError("Do Not Exist This Layout.")
def get_adjacency(self, strategy):
valid_hop = range(0, self.max_hop + 1, self.dilation)
adjacency = np.zeros((self.num_node, self.num_node))
for hop in valid_hop:
adjacency[self.hop_dis == hop] = 1
normalize_adjacency = normalize_digraph(adjacency)
if strategy == 'spatial':
A = []
for hop in valid_hop:
a_root = np.zeros((self.num_node, self.num_node))
a_close = np.zeros((self.num_node, self.num_node))
a_further = np.zeros((self.num_node, self.num_node))
for i in range(self.num_node):
for j in range(self.num_node):
if self.hop_dis[j, i] == hop:
if self.hop_dis[j, self.center] == self.hop_dis[
i, self.center]:
a_root[j, i] = normalize_adjacency[j, i]
elif self.hop_dis[j, self.center] > self.hop_dis[
i, self.center]:
a_close[j, i] = normalize_adjacency[j, i]
else:
a_further[j, i] = normalize_adjacency[j, i]
if hop == 0:
A.append(a_root)
else:
A.append(a_root + a_close)
A.append(a_further)
A = np.stack(A)
self.A = A
else:
raise ValueError("Do Not Exist This Strategy")
def conv_init(conv):
if conv.weight is not None:
weight_init_(conv.weight, 'kaiming_normal_', mode='fan_in')
if conv.bias is not None:
nn.initializer.Constant(value=0.0)(conv.bias)
def bn_init(bn, scale):
nn.initializer.Constant(value=float(scale))(bn.weight)
nn.initializer.Constant(value=0.0)(bn.bias)
def einsum(x1, x3):
"""paddle.einsum only support in dynamic graph mode.
x1 : n c u v
x2 : n c t v
"""
n, c, u, v1 = x1.shape
n, c, t, v3 = x3.shape
assert (v1 == v3), "Args of einsum not match!"
x1 = paddle.transpose(x1, perm=[0, 1, 3, 2]) # n c v u
y = paddle.matmul(x3, x1)
# out: n c t u
return y
class CTRGC(nn.Layer):
def __init__(self,
in_channels,
out_channels,
rel_reduction=8,
mid_reduction=1):
super(CTRGC, self).__init__()
self.in_channels = in_channels
self.out_channels = out_channels
# if in_channels == 3 or in_channels == 9:
# self.rel_channels = 8
# self.mid_channels = 16
# else:
# self.rel_channels = in_channels // rel_reduction
# self.mid_channels = in_channels // mid_reduction
self.rel_channels = 8
self.mid_channels = 16
# print(self.in_channels,self.rel_channels)
self.conv1 = nn.Conv2D(self.in_channels,
self.rel_channels,
kernel_size=1)
self.conv2 = nn.Conv2D(self.in_channels,
self.rel_channels,
kernel_size=1)
self.conv3 = nn.Conv2D(self.in_channels,
self.out_channels,
kernel_size=1)
self.conv4 = nn.Conv2D(self.rel_channels,
self.out_channels,
kernel_size=1)
self.tanh = nn.Tanh()
def init_weights(self):
"""Initiate the parameters.
"""
for m in self.sublayers():
if isinstance(m, nn.Conv2D):
conv_init(m)
elif isinstance(m, nn.BatchNorm2D):
bn_init(m, 1)
def forward(self, x, A=None, alpha=1):
#x.shape = []
x1, x2, x3 = self.conv1(x).mean(-2), self.conv2(x).mean(-2), self.conv3(
x)
x1 = self.tanh(x1.unsqueeze(-1) - x2.unsqueeze(-2))
x1 = self.conv4(x1) * alpha + (
A.unsqueeze(0).unsqueeze(0) if A is not None else 0) # N,C,V,V
# We only support 'paddle.einsum()' in dynamic graph mode, if use in infer model please implement self.
# x1 = paddle.einsum('ncuv,nctv->nctu', x1, x3)
x1 = einsum(x1, x3)
return x1
class TemporalConv(nn.Layer):
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride=1,
dilation=1):
super(TemporalConv, self).__init__()
pad = (kernel_size + (kernel_size - 1) * (dilation - 1) - 1) // 2
self.conv = nn.Conv2D(in_channels,
out_channels,
kernel_size=(kernel_size, 1),
padding=(pad, 0),
stride=(stride, 1),
dilation=(dilation, 1))
self.bn = nn.BatchNorm2D(out_channels)
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
return x
class MultiScale_TemporalConv(nn.Layer):
def __init__(self,
in_channels,
out_channels,
kernel_size=3,
stride=1,
dilations=[1, 2, 3, 4],
residual=True,
residual_kernel_size=1):
super(MultiScale_TemporalConv, self).__init__()
assert out_channels % (
len(dilations) +
2) == 0, '# out channels should be multiples of # branches'
# Multiple branches of temporal convolution
self.num_branches = len(dilations) + 2
branch_channels = out_channels // self.num_branches
if type(kernel_size) == list:
assert len(kernel_size) == len(dilations)
else:
kernel_size = [kernel_size] * len(dilations)
# Temporal Convolution branches
self.branches = nn.LayerList([
nn.Sequential(
nn.Conv2D(in_channels,
branch_channels,
kernel_size=1,
padding=0),
nn.BatchNorm2D(branch_channels),
nn.ReLU(),
TemporalConv(branch_channels,
branch_channels,
kernel_size=ks,
stride=stride,
dilation=dilation),
) for ks, dilation in zip(kernel_size, dilations)
])
# Additional Max & 1x1 branch
self.branches.append(
nn.Sequential(
nn.Conv2D(in_channels,
branch_channels,
kernel_size=1,
padding=0), nn.BatchNorm2D(branch_channels),
nn.ReLU(),
nn.MaxPool2D(kernel_size=(3, 1),
stride=(stride, 1),
padding=(1, 0)), nn.BatchNorm2D(branch_channels)))
self.branches.append(
nn.Sequential(
nn.Conv2D(in_channels,
branch_channels,
kernel_size=1,
padding=0,
stride=(stride, 1)), nn.BatchNorm2D(branch_channels)))
# Residual connection
if not residual:
self.residual = lambda x: 0
elif (in_channels == out_channels) and (stride == 1):
self.residual = lambda x: x
else:
self.residual = TemporalConv(in_channels,
out_channels,
kernel_size=residual_kernel_size,
stride=stride)
def init_weights(self):
"""Initiate the parameters.
"""
# initialize
for m in self.sublayers():
if isinstance(m, nn.Conv2D):
conv_init(m)
elif isinstance(m, nn.BatchNorm2D):
weight_init_(m.weight, 'Normal', std=0.02, mean=1.0)
nn.initializer.Constant(value=0.0)(m.bias)
def forward(self, x):
# Input dim: (N,C,T,V)
res = self.residual(x)
branch_outs = []
for tempconv in self.branches:
out = tempconv(x)
branch_outs.append(out)
out = paddle.concat(branch_outs, axis=1)
out += res
return out
class unit_tcn(nn.Layer):
def __init__(self, in_channels, out_channels, kernel_size=9, stride=1):
super(unit_tcn, self).__init__()
pad = int((kernel_size - 1) / 2)
self.conv = nn.Conv2D(in_channels,
out_channels,
kernel_size=(kernel_size, 1),
padding=(pad, 0),
stride=(stride, 1))
self.bn = nn.BatchNorm2D(out_channels)
self.relu = nn.ReLU()
conv_init(self.conv)
bn_init(self.bn, 1)
def forward(self, x):
x = self.bn(self.conv(x))
return x
class unit_gcn(nn.Layer):
def __init__(self,
in_channels,
out_channels,
A,
coff_embedding=4,
adaptive=True,
residual=True):
super(unit_gcn, self).__init__()
inter_channels = out_channels // coff_embedding
self.inter_c = inter_channels
self.out_c = out_channels
self.in_c = in_channels
self.adaptive = adaptive
self.num_subset = A.shape[0]
self.convs = nn.LayerList()
for i in range(self.num_subset):
self.convs.append(CTRGC(in_channels, out_channels))
if residual:
if in_channels != out_channels:
self.down = nn.Sequential(
nn.Conv2D(in_channels, out_channels, 1),
nn.BatchNorm2D(out_channels))
else:
self.down = lambda x: x
else:
self.down = lambda x: 0
if self.adaptive:
pa_param = paddle.ParamAttr(
initializer=paddle.nn.initializer.Assign(A.astype(np.float32)))
self.PA = paddle.create_parameter(shape=A.shape,
dtype='float32',
attr=pa_param)
else:
A_tensor = paddle.to_tensor(A, dtype="float32")
self.A = paddle.create_parameter(
shape=A_tensor.shape,
dtype='float32',
default_initializer=paddle.nn.initializer.Assign(A_tensor))
self.A.stop_gradient = True
alpha_tensor = paddle.to_tensor(np.zeros(1), dtype="float32")
self.alpha = paddle.create_parameter(
shape=alpha_tensor.shape,
dtype='float32',
default_initializer=paddle.nn.initializer.Assign(alpha_tensor))
self.bn = nn.BatchNorm2D(out_channels)
self.soft = nn.Softmax(-2)
self.relu = nn.ReLU()
def init_weights(self):
for m in self.sublayers():
if isinstance(m, nn.Conv2D):
conv_init(m)
elif isinstance(m, nn.BatchNorm2D):
bn_init(m, 1)
bn_init(self.bn, 1e-6)
def forward(self, x):
y = None
if self.adaptive:
A = self.PA
else:
A = self.A.cuda(x.get_device())
for i in range(self.num_subset):
z = self.convs[i](x, A[i], self.alpha)
y = z + y if y is not None else z
y = self.bn(y)
y += self.down(x)
y = self.relu(y)
return y
class TCN_GCN_unit(nn.Layer):
def __init__(self,
in_channels,
out_channels,
A,
stride=1,
residual=True,
adaptive=True,
kernel_size=5,
dilations=[1, 2]):
super(TCN_GCN_unit, self).__init__()
self.gcn1 = unit_gcn(in_channels, out_channels, A, adaptive=adaptive)
self.tcn1 = MultiScale_TemporalConv(out_channels,
out_channels,
kernel_size=kernel_size,
stride=stride,
dilations=dilations,
residual=False)
self.relu = nn.ReLU()
if not residual:
self.residual = lambda x: 0
elif (in_channels == out_channels) and (stride == 1):
self.residual = lambda x: x
else:
self.residual = unit_tcn(in_channels,
out_channels,
kernel_size=1,
stride=stride)
def forward(self, x):
y = self.relu(self.tcn1(self.gcn1(x)) + self.residual(x))
return y
class CTRGCN(nn.Layer):
"""
CTR-GCN model from:
`"Channel-wise Topology Refinement Graph Convolution for Skeleton-Based Action Recognition" <https://arxiv.org/abs/2107.12213>`_
Args:
num_point: int, numbers of sketeton point.
num_person: int, numbers of person.
base_channel: int, model's hidden dim.
graph: str, sketeton adjacency matrix name.
graph_args: dict, sketeton adjacency graph class args.
in_channels: int, channels of vertex coordinate. 2 for (x,y), 3 for (x,y,z). Default 3.
adaptive: bool, if adjacency matrix can adaptive.
"""
def __init__(self,
num_point=25,
num_person=1,
base_channel=64,
graph='ntu_rgb_d',
in_channels=2,
adaptive=True):
super(CTRGCN, self).__init__()
self.graph = Graph(layout='fsd10',
strategy='spatial')
A = self.graph.A # 3,25,25
self.num_point = num_point
self.data_bn = nn.BatchNorm1D(num_person * in_channels * num_point)
self.base_channel = base_channel
self.l1 = TCN_GCN_unit(in_channels,
self.base_channel,
A,
residual=False,
adaptive=adaptive)
self.l2 = TCN_GCN_unit(self.base_channel,
self.base_channel,
A,
adaptive=adaptive)
self.l3 = TCN_GCN_unit(self.base_channel,
self.base_channel,
A,
adaptive=adaptive)
self.l4 = TCN_GCN_unit(self.base_channel,
self.base_channel,
A,
adaptive=adaptive)
self.l5 = TCN_GCN_unit(self.base_channel,
self.base_channel * 2,
A,
stride=2,
adaptive=adaptive)
self.l6 = TCN_GCN_unit(self.base_channel * 2,
self.base_channel * 2,
A,
adaptive=adaptive)
self.l7 = TCN_GCN_unit(self.base_channel * 2,
self.base_channel * 2,
A,
adaptive=adaptive)
self.l8 = TCN_GCN_unit(self.base_channel * 2,
self.base_channel * 4,
A,
stride=2,
adaptive=adaptive)
self.l9 = TCN_GCN_unit(self.base_channel * 4,
self.base_channel * 4,
A,
adaptive=adaptive)
self.l10 = TCN_GCN_unit(self.base_channel * 4,
self.base_channel * 4,
A,
adaptive=adaptive)
def init_weights(self):
bn_init(self.data_bn, 1)
def forward(self, x):
N, C, T, V, M = x.shape
x = paddle.transpose(x, perm=[0, 4, 3, 1, 2])
x = paddle.reshape(x, (N, M * V * C, T))
x = self.data_bn(x)
x = paddle.reshape(x, (N, M, V, C, T))
x = paddle.transpose(x, perm=(0, 1, 3, 4, 2))
x = paddle.reshape(x, (N * M, C, T, V))
x = self.l1(x)
x = self.l2(x)
x = self.l3(x)
x = self.l4(x)
x = self.l5(x)
x = self.l6(x)
x = self.l7(x)
x = self.l8(x)
x = self.l9(x)
x = self.l10(x)
return x, N, M
x = paddle.randn([16,2,350,25,1])
len(CTRGCN()(x))
W0816 15:02:06.405629 162 gpu_resources.cc:61] Please NOTE: device: 0, GPU Compute Capability: 7.0, Driver API Version: 11.2, Runtime API Version: 10.1
W0816 15:02:06.409224 162 gpu_resources.cc:91] device: 0, cuDNN Version: 7.6.
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/nn/layer/norm.py:654: UserWarning: When training, we now always track global mean and variance.
"When training, we now always track global mean and variance.")
3
4.2 模型组网架构(Head部分)
class CTRGCNHead(nn.Layer):
"""
Head for CTR-GCN model.
Args:
in_channels: int, input feature channels. Default: 64.
num_classes: int, output the number of classes.
drop_out: float, dropout ratio of layer. Default: 0.
"""
def __init__(self, in_channels=64, num_classes=30, drop_out=0, **kwargs):
super().__init__()
self.in_channels = in_channels
self.drop_out = drop_out
self.num_classes = num_classes
self.fc = nn.Linear(self.in_channels * 4, self.num_classes)
if drop_out:
self.drop_out = nn.Dropout(self.drop_out)
else:
self.drop_out = lambda x: x
self.init_weights()
def init_weights(self):
"""Initiate the parameters.
"""
for layer in self.sublayers():
if isinstance(layer, nn.Conv2D):
weight_init_(layer.weight,
'Normal',
mean=0.0,
std=math.sqrt(2. / self.num_classes))
def forward(self, output_patch):
"""Define how the head is going to run.
"""
x, N, M = output_patch
# N*M,C,T,V
_, c_new, T, V = x.shape
x = paddle.reshape(x, shape=[N, M, c_new, T * V])
x = x.mean(3).mean(1)
x = self.drop_out(x)
return self.fc(x)
4.3 模型组网架构(framework部分)
class CTRGCN_framework(nn.Layer):
def __init__(self,num_classes = 30):
super().__init__()
self.backbone = CTRGCN()
self.head = CTRGCNHead(num_classes = num_classes)
def forward(self,data):
feature = self.backbone(data)
cls_score = self.head(feature)
return cls_score
x = paddle.randn([16,2,350,25,1])
CTRGCN_framework()(x).shape
[16, 30]
4.4 数据Dataset预处理
import numpy as np
class AutoPadding(object):
"""
Sample or Padding frame skeleton feature.
Args:
window_size: int, temporal size of skeleton feature.
random_pad: bool, whether do random padding when frame length < window size. Default: False.
"""
def __init__(self, window_size, random_pad=False):
self.window_size = window_size
self.random_pad = random_pad
def get_frame_num(self, data):
C, T, V, M = data.shape
for i in range(T - 1, -1, -1):
tmp = np.sum(data[:, i, :, :])
if tmp > 0:
T = i + 1
break
return T
def __call__(self, results):
# data = results['data']
data = results
C, T, V, M = data.shape
T = self.get_frame_num(data)
if T == self.window_size:
data_pad = data[:, :self.window_size, :, :]
elif T < self.window_size:
begin = random.randint(0, self.window_size -
T) if self.random_pad else 0
data_pad = np.zeros((C, self.window_size, V, M))
data_pad[:, begin:begin + T, :, :] = data[:, :T, :, :]
else:
if self.random_pad:
index = np.random.choice(T, self.window_size,
replace=False).astype('int64')
else:
index = np.linspace(0, T-1, self.window_size).astype("int64")
data_pad = data[:, index, :, :]
# results['data'] = data_pad
# return results
return data_pad
#每7个取一个当验证集
train_index = []
valid_index= []
for i in range(2922):
if i%7 !=1:
train_index.append(i)
else:
valid_index.append(i)
train_index =np.array(train_index).astype("int64")
valid_index = np.array(valid_index).astype("int64")
import paddle
import numpy as np
import paddle.nn.functional as F
from visualdl import LogWriter
from tqdm import tqdm
log_writer = LogWriter("./log/gnet")
class Dataset(paddle.io.Dataset):
def __init__(self,is_train = True):
data = np.load("/home/aistudio/data/data104925/train_data.npy").astype("float32") #[2922, 3, 2500, 25, 1]
label = np.load("/home/aistudio/data/data104925/train_label.npy")
split_index = 2400
self.autopad = AutoPadding(window_size= 1000)
self.train_data = data[train_index,:,:,:,:]
self.valid_data = data[valid_index,:,:,:,:]
self.train_label = label[train_index]
self.valid_label = label[valid_index]
self.is_train = is_train
if self.is_train == True:
self.size = len(self.train_data)
else:
self.size = len(self.valid_data)
def __getitem__(self, index):
if self.is_train == True:
one_row = self.train_data[index]
one_label = self.train_label[index]
else:
one_row = self.valid_data[index]
one_label = self.valid_label[index]
one_row = one_row[:2, :, :, :]
one_row = self.autopad(one_row).astype("float32")
return one_row,one_label
def __len__(self):
return self.size
BATCH_SIZE =16
train_dataset = Dataset()
data_loader = paddle.io.DataLoader(train_dataset,batch_size=BATCH_SIZE,shuffle =True,drop_last=True)
for data in data_loader:
print(data[0].shape,data[1].shape)
break
[16, 2, 1000, 25, 1] [16]
def valid_accurary(valid_loader,classifer_net):
with paddle.set_grad_enabled(False):
acc_all = 0
num = 0
for one in valid_loader:
img_data,cls=one
# print()
out = classifer_net(img_data)
# print(out.shape)
# out = nn.Softmax()(out)
# out = paddle.multinomial(out, num_samples=1, replacement=False, name=None)
acc = paddle.metric.accuracy(out,cls.unsqueeze(1))
acc_all+=acc.numpy()[0]
num+=1
# if out[0] == cls:
# right +=1
# print("right",right)
return acc_all/num
valid_dataset = Dataset(is_train=False)
valid_loader = paddle.io.DataLoader(valid_dataset,batch_size=16,shuffle =True,drop_last=True)
4.5 测试验证集准确率(我自己joint模式的参数文件)
我自己训练的准确率为0.60
valid_dataset = Dataset(is_train=False)
valid_loader = paddle.io.DataLoader(valid_dataset,batch_size=16,shuffle =True,drop_last=True)
ctrgcn = CTRGCN_framework()
# agcn.set_state_dict(paddle.load("AGCN_fsd.pdparams"))
ctrgcn.set_state_dict(paddle.load("Gmodel_state0.60.pdparams"))
# print(agcn.backbone.l4.gcn1.alpha.numpy())
# print(agcn.backbone.l5.gcn1.alpha.numpy())
print("自己准确率",valid_accurary(valid_loader,ctrgcn))
自己准确率 0.6057692307692307
4.6 自己进行训练
import paddle.nn as nn
from learning_rates import CustomWarmupAdjustDecay
ctrgcn = CTRGCN_framework()
# scheduler_G = paddle.optimizer.lr.CosineAnnealingDecay(learning_rate =0.05, T_max =60, eta_min=0, last_epoch=- 1, verbose=True)
scheduler_G = CustomWarmupAdjustDecay(step_base_lr =0.1, warmup_epochs =5, lr_decay_rate=0.1, boundaries = [35, 55],num_iters = 0.1,verbose=True)
optimizer = paddle.optimizer.Momentum(learning_rate=scheduler_G, momentum=0.9, parameters=ctrgcn.parameters(), use_nesterov=False, weight_decay=1e-4, grad_clip=None, name=None)
Epoch 0: CustomWarmupAdjustDecay set learning rate to 0.02.
import os
epoches =100
i = 0
smooth_label= True
v_acc_max = 0.
crossEntropyLoss =nn.CrossEntropyLoss(soft_label = smooth_label)
for epoch in range(epoches):
print("epoch",epoch)
for data in tqdm(data_loader):
one_data,cls=data
out = ctrgcn(one_data)
optimizer.clear_grad()
if smooth_label ==True:
labels = F.one_hot(cls, 30)
# print(labels.shape)
labels = F.label_smooth(labels, epsilon=0.1)
# print(labels.shape)
# labels = paddle.squeeze(labels, axis=1)
loss = crossEntropyLoss(out,labels)
else:
loss = crossEntropyLoss(out,cls)
loss.backward()
optimizer.step()
log_writer.add_scalar(tag='train/loss', step=i, value=loss.numpy()[0])
if i%100 == 3:
print("loss",loss.numpy()[0],v_acc_max)
i+=1
# break
if epoch%2 == 0:
ctrgcn.eval()
v_acc = valid_accurary(valid_loader,ctrgcn)
ctrgcn.train()
print("epoch loss",loss.numpy()[0],v_acc)
log_writer.add_scalar(tag='train/v_acc', step=i, value=v_acc)
if v_acc > v_acc_max:
v_acc_max = v_acc
save_param_path_model = os.path.join("model", 'Gmodel_state'+str(v_acc_max)+'.pdparams')
paddle.save(ctrgcn.state_dict(), save_param_path_model)
scheduler_G.step()
# break
5. 总结
其中TCN把channels分成几个分支的做法我挺喜欢,利用不同的空洞,然后这个动态拓扑对我挺新鲜。
此文章为搬运
原项目链接
更多推荐
所有评论(0)