PPSIG:AAGCN动作识别
数据集为fsd-10花样滑冰,项目为AACGN动作识别,可直接运行,acc为0.585
PPSIG:AAGCN 动作识别
论文名称:Skeleton-Based Action Recognition with Multi-Stream Adaptive Graph Convolutional Networks
In this work, we propose a novel multi-stream attention-enhanced adaptive graph convolutional neural network (MS-AAGCN) for skeleton-based action recognition.
参考pytorch项目代码:https://github.com/lshiwjx/2s-AGCN/blob/master/model/aagcn.py
论文PDF已经放在项目中
1. 概要介绍
这篇论文做的任务是基于骨骼点进行动作识别,是在AGCN-2S论文之后同作者提出的(AGCN-2S可见我的ai studio项目),核心增加了Attention Module。
在AGCN中特征为[batch_size*people,channels,时间维度,空间维度],那么在AGCN-2S中,特征经过 spatial adaptive graph convolutional layer(SAGCL)后,应该进行Temporal adaptive graph convolutional layer(TAGCL),但是在这篇论文中我们在SAGCL和TAGCL中间加入Attention Module,那么什么是Attention Module,它可以分成3个部分Spatial Attention Module(SAM) ,Temporal attention module (TAM), Channel attention module (CAM)。
这是STC Attention Module 模块示意图:
#y.shape = [batch_size\*people,channels,时间维度T,空间维度V]
# num_jpts = A.shape[-1]
# ker_jpt = num_jpts - 1 if not num_jpts % 2 else num_jpts
# pad = (ker_jpt - 1) // 2
# rr=2
# self.conv_sa = nn.Conv1D(out_channels, 1, ker_jpt, padding=pad)
# self.conv_ta = nn.Conv1D(out_channels, 1, 9, padding=4)
# self.fc1c = nn.Linear(out_channels, out_channels // rr)
# self.fc2c = nn.Linear(out_channels // rr, out_channels)
if self.attention:
# spatial attention
se = y.mean(-2) # N C V
se1 = self.sigmoid(self.conv_sa(se))
y = y * se1.unsqueeze(-2) + y
# temporal attention
se = y.mean(-1)
se1 = self.sigmoid(self.conv_ta(se))
y = y * se1.unsqueeze(-1) + y
# channel attention
se = y.mean(-1).mean(-1)
se1 = self.relu(self.fc1c(se))
se2 = self.sigmoid(self.fc2c(se1))
y = y * se2.unsqueeze(-1).unsqueeze(-1) + y
这是基本的网络模型架构:
2. AAGCN的公式
这个是AGCN-2S公式:
第一个子图(Bk)是从数据中学习到的全局图。它表示了更适合于动作识别任务的图的拓扑结构。它由基于人体结构的图的邻接矩阵(就是AGCN-2S中的Ak)初始化.Bk的元素在训练过程中与其他参数一起进行参数化和更新。对Bk的值没有约束,这意味着该图是根据训练数据完全学习的。通过这种数据驱动的方法,该模型可以学习完全针对识别任务的图。Bk对于每一层都是独一无二的,因此对于不同层中包含的不同语义级别更加个性化。
第二个子图(Ck)是单个图,它为每个样本学习唯一的拓扑。
3. 数据集介绍:
依旧采用fsd-10花样滑冰数据集
维度符号表示 | 维度值大小 | 维度含义 | 补充说明 |
---|---|---|---|
N | 样本数 | 代表N个样本 | 无 |
C | 3 | 分别代表每个关节点的x,y坐标和置信度 | 每个x,y均被放缩至-1到1之间 |
T | 1000 | 代表动作的持续时间长度,共有1000帧 | 有的动作的实际长度可能超过1000,于是我们就抽取其中1000帧,代码见autopadding |
V | 25 | 代表25个关节点 | 具体关节点的含义可看下方的骨架示例图 |
M | 1 | 代表1个运动员个数 | 无 |
骨架示例图(注意8号索引关键点为人体中心):

4. 代码部分
# 导包
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
import numpy as np
from weight_init import weight_init_
# !unzip 2s-AGCN-paddle-main.zip
4.1 模型组网架构(Backbone部分)
4.1.1 得到A_k这个邻接矩阵,这个邻接矩阵记录人体骨骼点连接信息,然后分成3个子集
由于处理方法和ST-GCN一模一样,建议看我的PPSIG:Paddlesports ST-GCN动作识别
精彩分步可视化讲解。
def zero(x):
return 0
def iden(x):
return x
def einsum(x, A):
"""paddle.einsum will be implemented in release/2.2.
self.kernel_size = 3
x.shape = (n, self.kernel_size, (kc // self.kernel_size) ==out_channels, t, v)
"""
x = x.transpose((0, 2, 3, 1, 4))
n, c, t, k, v = x.shape
k2, v2, w = A.shape
assert (k == k2 and v == v2), "Args of einsum not match!"
x = x.reshape((n, c, t, k * v))
A = A.reshape((k * v, w))
y = paddle.matmul(x, A)
return y
def get_hop_distance(num_node, edge, max_hop=1):
A = np.zeros((num_node, num_node))
for i, j in edge:
A[j, i] = 1
A[i, j] = 1
# compute hop steps
hop_dis = np.zeros((num_node, num_node)) + np.inf
transfer_mat = [np.linalg.matrix_power(A, d) for d in range(max_hop + 1)]
arrive_mat = (np.stack(transfer_mat) > 0)
for d in range(max_hop, -1, -1):
hop_dis[arrive_mat[d]] = d
return hop_dis
def normalize_digraph(A):
Dl = np.sum(A, 0)
num_node = A.shape[0]
Dn = np.zeros((num_node, num_node))
for i in range(num_node):
if Dl[i] > 0:
Dn[i, i] = Dl[i]**(-1)
AD = np.dot(A, Dn)
return AD
class Graph():
def __init__(self,
layout='openpose',
strategy='uniform',
max_hop=1,
dilation=1):
self.max_hop = max_hop
self.dilation = dilation
self.get_edge(layout)
self.hop_dis = get_hop_distance(self.num_node,
self.edge,
max_hop=max_hop)
self.get_adjacency(strategy)
def __str__(self):
return self.A
def get_edge(self, layout):
# edge is a list of [child, parent] paris
if layout == 'fsd10':
self.num_node = 25
self_link = [(i, i) for i in range(self.num_node)]
neighbor_link = [(1, 8), (0, 1), (15, 0), (17, 15), (16, 0),
(18, 16), (5, 1), (6, 5), (7, 6), (2, 1), (3, 2),
(4, 3), (9, 8), (10, 9), (11, 10), (24, 11),
(22, 11), (23, 22), (12, 8), (13, 12), (14, 13),
(21, 14), (19, 14), (20, 19)]
self.edge = self_link + neighbor_link
self.center = 8
elif layout == 'ntu-rgb+d':
self.num_node = 25
self_link = [(i, i) for i in range(self.num_node)]
neighbor_1base = [(1, 2), (2, 21), (3, 21), (4, 3), (5, 21), (6, 5),
(7, 6), (8, 7), (9, 21), (10, 9), (11, 10),
(12, 11), (13, 1), (14, 13), (15, 14), (16, 15),
(17, 1), (18, 17), (19, 18), (20, 19), (22, 23),
(23, 8), (24, 25), (25, 12)]
neighbor_link = [(i - 1, j - 1) for (i, j) in neighbor_1base]
self.edge = self_link + neighbor_link
self.center = 21 - 1
else:
raise ValueError("Do Not Exist This Layout.")
def get_adjacency(self, strategy):
valid_hop = range(0, self.max_hop + 1, self.dilation)
adjacency = np.zeros((self.num_node, self.num_node))
for hop in valid_hop:
adjacency[self.hop_dis == hop] = 1
normalize_adjacency = normalize_digraph(adjacency)
if strategy == 'spatial':
A = []
for hop in valid_hop:
a_root = np.zeros((self.num_node, self.num_node))
a_close = np.zeros((self.num_node, self.num_node))
a_further = np.zeros((self.num_node, self.num_node))
for i in range(self.num_node):
for j in range(self.num_node):
if self.hop_dis[j, i] == hop:
if self.hop_dis[j, self.center] == self.hop_dis[
i, self.center]:
a_root[j, i] = normalize_adjacency[j, i]
elif self.hop_dis[j, self.center] > self.hop_dis[
i, self.center]:
a_close[j, i] = normalize_adjacency[j, i]
else:
a_further[j, i] = normalize_adjacency[j, i]
if hop == 0:
A.append(a_root)
else:
A.append(a_root + a_close)
A.append(a_further)
A = np.stack(A)
self.A = A
else:
raise ValueError("Do Not Exist This Strategy")
import paddle as pp
import paddle.nn as nn
import numpy as np
class unit_tcn(nn.Layer):
def __init__(self, in_channels, out_channels, kernel_size=9, stride=1):
super(unit_tcn, self).__init__()
pad = int((kernel_size - 1) / 2)
self.conv = nn.Conv2D(in_channels, out_channels, kernel_size=(kernel_size, 1), padding=(pad, 0), stride=(stride, 1))
self.bn = nn.BatchNorm2D(out_channels)
self.relu = nn.ReLU()
def forward(self, x):
" input size : (N*M, C, T, V)"
x = self.bn(self.conv(x))
return x
class unit_gcn(nn.Layer):
'''
这里是spatial adaptive graph convolutional layer加上Attention Module
'''
def __init__(self, in_channels, out_channels, A, coff_embedding=4, num_subset=3):
super(unit_gcn, self).__init__()
inter_channels = out_channels // coff_embedding
self.inter_c = inter_channels
alpha = self.create_parameter([1],default_initializer=nn.initializer.Constant(1e-4))
self.add_parameter("alpha",alpha)
# paddle.nn.initializer.Constant(val
# self.alpha = nn.Parameter(paddle.zeros(1))
pa_param = paddle.ParamAttr(initializer=paddle.nn.initializer.Assign(A.astype(np.float32)))
self.PA = paddle.create_parameter(shape=A.shape,
dtype='float32',
attr=pa_param)
self.num_subset = num_subset
self.sigmoid = nn.Sigmoid()
self.conv_a = nn.LayerList()
self.conv_b = nn.LayerList()
self.conv_d = nn.LayerList()
# print("in_channels",in_channels)
for i in range(self.num_subset):
self.conv_a.append(nn.Conv2D(in_channels, inter_channels, 1))
self.conv_b.append(nn.Conv2D(in_channels, inter_channels, 1))
self.conv_d.append(nn.Conv2D(in_channels, out_channels, 1))
if in_channels != out_channels:
self.down = nn.Sequential(
nn.Conv2D(in_channels, out_channels, 1),
nn.BatchNorm2D(out_channels)
)
else:
self.down = lambda x: x
self.bn = nn.BatchNorm2D(out_channels)
self.soft = nn.Softmax(-2)
self.relu = nn.ReLU()
num_jpts = A.shape[-1]
ker_jpt = num_jpts - 1 if not num_jpts % 2 else num_jpts
pad = (ker_jpt - 1) // 2
rr=2
self.conv_sa = nn.Conv1D(out_channels, 1, ker_jpt, padding=pad)
self.conv_ta = nn.Conv1D(out_channels, 1, 9, padding=4)
self.fc1c = nn.Linear(out_channels, out_channels // rr)
self.fc2c = nn.Linear(out_channels // rr, out_channels)
self.attention = True
def forward(self, x):
N, C, T, V = x.shape
A = self.PA
y = None
for i in range(self.num_subset):
# print(i)
m = self.conv_a[i](x)
A1 = pp.transpose(m, perm=[0, 3, 1, 2]).reshape([N, V, self.inter_c * T])
A2 = self.conv_b[i](x).reshape([N, self.inter_c * T, V])
A1 = self.soft(pp.matmul(A1, A2) / A1.shape[-1])
A1 = A1* self.sigmoid(self.alpha) + A[i]
A2 = x.reshape([N, C * T, V])
z = self.conv_d[i](pp.matmul(A2, A1).reshape([N, C, T, V]))
y = z + y if y is not None else z
y = self.bn(y)
y += self.down(x)
y = self.relu(y)
if self.attention:
# spatial attention
se = y.mean(-2) # N C V
se1 = self.sigmoid(self.conv_sa(se))
y = y * se1.unsqueeze(-2) + y
# a1 = se1.unsqueeze(-2)
# temporal attention
se = y.mean(-1)
se1 = self.sigmoid(self.conv_ta(se))
y = y * se1.unsqueeze(-1) + y
# a2 = se1.unsqueeze(-1)
# channel attention
se = y.mean(-1).mean(-1)
se1 = self.relu(self.fc1c(se))
se2 = self.sigmoid(self.fc2c(se1))
y = y * se2.unsqueeze(-1).unsqueeze(-1) + y
# a3 = se2.unsqueeze(-1).unsqueeze(-1)
# unified attention
# y = y * self.Attention + y
# y = y + y * ((a2 + a3) / 2)
# y = self.bn(y)
return y
class TCN_GCN_unit(nn.Layer):
def __init__(self, in_channels, out_channels, A, stride=1, residual=True):
super(TCN_GCN_unit, self).__init__()
self.gcn1 = unit_gcn(in_channels, out_channels, A)
self.tcn1 = unit_tcn(out_channels, out_channels, stride=stride)
self.relu = nn.ReLU()
if not residual:
self.residual = lambda x: 0
elif (in_channels == out_channels) and (stride == 1):
self.residual = lambda x: x
else:
self.residual = unit_tcn(in_channels, out_channels, kernel_size=1, stride=stride)
def forward(self, x):
x = self.tcn1(self.gcn1(x)) + self.residual(x)
return self.relu(x)
class AGCN(nn.Layer):
def __init__(self, num_class=30, num_point=25, num_person=1, graph_args=None, in_channels=2):
super(AGCN, self).__init__()
self.graph = Graph(layout='fsd10',
strategy='spatial')
A = self.graph.A
self.data_bn = nn.BatchNorm1D(num_person * in_channels * num_point)
self.l1 = TCN_GCN_unit(in_channels, 64, A, residual=False)
self.l2 = TCN_GCN_unit(64, 64, A)
self.l3 = TCN_GCN_unit(64, 64, A)
self.l4 = TCN_GCN_unit(64, 64, A)
self.l5 = TCN_GCN_unit(64, 128, A, stride=2)
self.l6 = TCN_GCN_unit(128, 128, A)
self.l7 = TCN_GCN_unit(128, 128, A)
self.l8 = TCN_GCN_unit(128, 256, A, stride=2)
self.l9 = TCN_GCN_unit(256, 256, A)
self.l10 = TCN_GCN_unit(256, 256, A)
self.pool = nn.AdaptiveAvgPool2D(output_size=(1, 1))
self.fc = nn.Linear(256, num_class)
def forward(self, x):
N, C, T, V, M = x.shape
x = x.transpose([0, 4, 3, 1, 2]).reshape_([N, M * V * C, T])
x = self.data_bn(x)
x = x.reshape_([N, M, V, C, T]).transpose([0, 1, 3, 4, 2]).reshape_([N * M, C, T, V])
x = self.l1(x)
x = self.l2(x)
x = self.l3(x)
x = self.l4(x)
x = self.l5(x)
x = self.l6(x)
x = self.l7(x)
x = self.l8(x)
x = self.l9(x)
x = self.l10(x)
x = self.pool(x)
# N*M,C,T,V
# c_new = x.shape[1]
# x = x.reshape([N, M, c_new, -1])
# x = x.mean(3).mean(1)
# return self.fc(x)
return x
import paddle
x = paddle.randn([16,2,350,25,1])
AGCN()(x).shape
W0815 18:25:47.193257 3656 gpu_resources.cc:61] Please NOTE: device: 0, GPU Compute Capability: 7.0, Driver API Version: 11.2, Runtime API Version: 10.1
W0815 18:25:47.197376 3656 gpu_resources.cc:91] device: 0, cuDNN Version: 7.6.
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/nn/layer/norm.py:654: UserWarning: When training, we now always track global mean and variance.
"When training, we now always track global mean and variance.")
[16, 256, 1, 1]
4.2 模型组网架构(Head部分)
class STGCNHead(nn.Layer):
"""
Head for ST-GCN model.
Args:
in_channels: int, input feature channels. Default: 256.
num_classes: int, number classes. Default: 10.
"""
def __init__(self, in_channels=256, num_classes=10, **kwargs):
super().__init__()
self.fcn = nn.Conv2D(in_channels=in_channels,
out_channels=num_classes,
kernel_size=1)
def init_weights(self):
"""Initiate the parameters.
"""
for layer in self.sublayers():
if isinstance(layer, nn.Conv2D):
weight_init_(layer, 'Normal', std=0.02)
def forward(self, x):
"""Define how the head is going to run.
"""
x = self.fcn(x)
x = paddle.reshape_(x, (x.shape[0], -1)) # N,C,1,1 --> N,C
return x
4.3 模型组网架构(framework部分)
class AGCN_framework(nn.Layer):
def __init__(self,num_classes = 30):
super().__init__()
self.backbone = AGCN()
self.head = STGCNHead(num_classes = num_classes)
def forward(self,data):
feature = self.backbone(data)
cls_score = self.head(feature)
return cls_score
x = paddle.randn([16,2,350,25,1])
AGCN_framework()(x).shape
[16, 30]
4.5 数据Dataset预处理
import numpy as np
np.load("/home/aistudio/data/data104925/train_data.npy")[0].shape
(3, 2500, 25, 1)
import numpy as np
class AutoPadding(object):
"""
Sample or Padding frame skeleton feature.
Args:
window_size: int, temporal size of skeleton feature.
random_pad: bool, whether do random padding when frame length < window size. Default: False.
"""
def __init__(self, window_size, random_pad=False):
self.window_size = window_size
self.random_pad = random_pad
def get_frame_num(self, data):
C, T, V, M = data.shape
for i in range(T - 1, -1, -1):
tmp = np.sum(data[:, i, :, :])
if tmp > 0:
T = i + 1
break
return T
def __call__(self, results):
# data = results['data']
data = results
C, T, V, M = data.shape
T = self.get_frame_num(data)
if T == self.window_size:
data_pad = data[:, :self.window_size, :, :]
elif T < self.window_size:
begin = random.randint(0, self.window_size -
T) if self.random_pad else 0
data_pad = np.zeros((C, self.window_size, V, M))
data_pad[:, begin:begin + T, :, :] = data[:, :T, :, :]
else:
if self.random_pad:
index = np.random.choice(T, self.window_size,
replace=False).astype('int64')
else:
index = np.linspace(0, T-1, self.window_size).astype("int64")
data_pad = data[:, index, :, :]
# results['data'] = data_pad
# return results
return data_pad
label = np.load("/home/aistudio/data/data104925/train_label.npy")
print(label.shape)
#查看全部label分布
with open("preds.txt","w") as f:
for i in label:
f.write(str(int(i))+"\n")
(2922,)
#每7个取一个当验证集
train_index = []
valid_index= []
for i in range(2922):
if i%7 !=1:
train_index.append(i)
else:
valid_index.append(i)
train_index =np.array(train_index).astype("int64")
valid_index = np.array(valid_index).astype("int64")
'''
来自:https://github.com/PaddlePaddle/PaddleVideo/blob/2f10ee26e232b83809ac4f11ac76b30fa262a86e/paddlevideo/loader/pipelines/skeleton_pipeline.py
默认是使用joint模式就是使用关节坐标,如果只设置bone = True,那么就是使用骨骼长度与方向
'''
class SketeonModalityTransform(object):
"""
Sketeon Crop Sampler.
Args:
crop_model: str, crop model, support: ['center'].
p_interval: list, crop len
window_size: int, sample windows size.
"""
def __init__(self, bone = False, motion = False, joint=True, graph='fsd10'):
self.joint = joint
self.bone = bone
self.motion = motion
self.graph = graph
if self.graph == "ntu_rgb_d":
self.bone_pairs = ((1, 2), (2, 21), (3, 21), (4, 3), (5, 21),
(6, 5), (7, 6), (8, 7), (9, 21), (10, 9),
(11, 10), (12, 11), (13, 1), (14, 13), (15, 14),
(16, 15), (17, 1), (18, 17), (19, 18), (20, 19),
(22, 23), (21, 21), (23, 8), (24, 25), (25, 12))
elif self.graph == 'fsd10':
self.num_node = 25
self.bone_pairs = [(1, 8), (0, 1), (15, 0), (17, 15), (16, 0),
(18, 16), (5, 1), (6, 5), (7, 6), (2, 1), (3, 2),
(4, 3), (9, 8), (10, 9), (11, 10), (24, 11),
(22, 11), (23, 22), (12, 8), (13, 12), (14, 13),
(21, 14), (19, 14), (20, 19)]
else:
raise NotImplementedError
def __call__(self, results):
if self.joint:
return results
data_numpy = results
if self.bone:
bone_data_numpy = np.zeros_like(data_numpy)
for v1, v2 in self.bone_pairs:
bone_data_numpy[:, :, v1 -
1] = data_numpy[:, :, v1 -
1] - data_numpy[:, :, v2 - 1]
data_numpy = bone_data_numpy
if self.motion:
data_numpy[:, :-1] = data_numpy[:, 1:] - data_numpy[:, :-1]
data_numpy[:, -1] = 0
results = data_numpy
return results
import paddle
import numpy as np
import paddle.nn.functional as F
from visualdl import LogWriter
from tqdm import tqdm
log_writer = LogWriter("./log/gnet")
class Dataset(paddle.io.Dataset):
def __init__(self,is_train = True):
data = np.load("/home/aistudio/data/data104925/train_data.npy").astype("float32") #[2922, 3, 2500, 25, 1]
label = np.load("/home/aistudio/data/data104925/train_label.npy")
split_index = 2400
self.autopad = AutoPadding(window_size= 1000)
self.train_data = data[train_index,:,:,:,:]
self.valid_data = data[valid_index,:,:,:,:]
self.train_label = label[train_index]
self.valid_label = label[valid_index]
self.is_train = is_train
if self.is_train == True:
self.size = len(self.train_data)
else:
self.size = len(self.valid_data)
self.sketeon = SketeonModalityTransform(bone = False, motion = False, joint=True)
def __getitem__(self, index):
if self.is_train == True:
one_row = self.train_data[index]
one_label = self.train_label[index]
else:
one_row = self.valid_data[index]
one_label = self.valid_label[index]
one_row = one_row[:2, :, :, :]
one_row = self.autopad(one_row).astype("float32")
one_row = self.sketeon(one_row)
return one_row,one_label
def __len__(self):
return self.size
print(len(Dataset()))
2504
for i in Dataset():
print(i[0].dtype,i[1])
break
float32 27
BATCH_SIZE =16
train_dataset = Dataset()
data_loader = paddle.io.DataLoader(train_dataset,batch_size=BATCH_SIZE,shuffle =True,drop_last=True)
for data in data_loader:
print(data[0].shape,data[1].shape)
break
[16, 2, 1000, 25, 1] [16]
def valid_accurary(valid_loader,classifer_net):
with paddle.set_grad_enabled(False):
acc_all = 0
num = 0
for one in valid_loader:
img_data,cls=one
# print()
out = classifer_net(img_data)
# print(out.shape)
# out = nn.Softmax()(out)
# out = paddle.multinomial(out, num_samples=1, replacement=False, name=None)
acc = paddle.metric.accuracy(out,cls.unsqueeze(1))
acc_all+=acc.numpy()[0]
num+=1
# if out[0] == cls:
# right +=1
# print("right",right)
return acc_all/num
4.6 测试验证集准确率(我自己joint模式的参数文件)
我自己训练的准确率为0.585
valid_dataset = Dataset(is_train=False)
valid_loader = paddle.io.DataLoader(valid_dataset,batch_size=16,shuffle =True,drop_last=True)
agcn = AGCN_framework()
# agcn.set_state_dict(paddle.load("AGCN_fsd.pdparams"))
agcn.set_state_dict(paddle.load("Gmodel_state0.588.pdparams"))
print(agcn.backbone.l4.gcn1.alpha.numpy())
print(agcn.backbone.l5.gcn1.alpha.numpy())
valid_dataset = Dataset(is_train=False)
valid_loader = paddle.io.DataLoader(valid_dataset,batch_size=16,shuffle =True,drop_last=True)
print("自己准确率",valid_accurary(valid_loader,agcn))
[0.27299923]
[0.6008839]
自己准确率 0.5841346153846154
4.7 自己进行训练
import math
from paddle.optimizer.lr import *
import numpy as np
class CustomWarmupAdjustDecay(LRScheduler):
r"""
We combine warmup and stepwise-cosine which is used in slowfast model.
Args:
step_base_lr (float): start learning rate used in warmup stage.
warmup_epochs (int): the number epochs of warmup.
lr_decay_rate (float|int, optional): base learning rate decay rate.
step (int): step in change learning rate.
last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
verbose (bool, optional): If ``True``, prints a message to stdout for each update. Default: ``False`` .
Returns:
``CosineAnnealingDecay`` instance to schedule learning rate.
"""
def __init__(self,
step_base_lr,
warmup_epochs,
lr_decay_rate,
boundaries,
num_iters=None,
last_epoch=-1,
verbose=False):
self.step_base_lr = step_base_lr
self.warmup_epochs = warmup_epochs
self.lr_decay_rate = lr_decay_rate
self.boundaries = boundaries
self.num_iters = num_iters
#call step() in base class, last_lr/last_epoch/base_lr will be update
super(CustomWarmupAdjustDecay, self).__init__(last_epoch=last_epoch,
verbose=verbose)
def step(self, epoch=None):
"""
``step`` should be called after ``optimizer.step`` . It will update the learning rate in optimizer according to current ``epoch`` .
The new learning rate will take effect on next ``optimizer.step`` .
Args:
epoch (int, None): specify current epoch. Default: None. Auto-increment from last_epoch=-1.
Returns:
None
"""
if epoch is None:
if self.last_epoch == -1:
self.last_epoch += 1
else:
self.last_epoch += 1 / self.num_iters # update step with iters
else:
self.last_epoch = epoch
self.last_lr = self.get_lr()
if self.verbose:
print('Epoch {}: {} set learning rate to {}.'.format(
self.last_epoch, self.__class__.__name__, self.last_lr))
def get_lr(self):
if self.last_epoch < self.warmup_epochs:
lr = self.step_base_lr * (self.last_epoch + 1) / self.warmup_epochs
else:
lr = self.step_base_lr * (self.lr_decay_rate**np.sum(
self.last_epoch >= np.array(self.boundaries)))
return lr
import paddle.nn as nn
agcn = AGCN_framework()
crossEntropyLoss =nn.CrossEntropyLoss()
# scheduler_G = paddle.optimizer.lr.CosineAnnealingDecay(learning_rate =0.05, T_max =60, eta_min=0, last_epoch=- 1, verbose=False)
scheduler_G = CustomWarmupAdjustDecay(0.1,5,0.1,[ 30, 40 ],1,verbose = True)
optimizer = paddle.optimizer.Momentum(learning_rate=scheduler_G, momentum=0.9, parameters=agcn.parameters(), use_nesterov=False, weight_decay=1e-4, grad_clip=None, name=None)
Epoch 0: CustomWarmupAdjustDecay set learning rate to 0.02.
import os
epoches =70
i = 0
v_acc_max = 0
for epoch in range(epoches):
print("epoch",epoch)
for data in tqdm(data_loader):
one_data,cls=data
out = agcn(one_data)
optimizer.clear_grad()
loss = crossEntropyLoss(out,cls)
loss.backward()
optimizer.step()
log_writer.add_scalar(tag='train/loss', step=i, value=loss.numpy()[0])
if i%100 == 3:
print("loss",loss.numpy()[0],v_acc_max)
i+=1
# break
if epoch%2 == 0:
agcn.eval()
v_acc = valid_accurary(valid_loader,agcn)
agcn.train()
print("epoch loss",loss.numpy()[0],v_acc)
log_writer.add_scalar(tag='train/v_acc', step=i, value=v_acc)
if v_acc > v_acc_max:
v_acc_max = v_acc
save_param_path_model = os.path.join("model", 'Gmodel_state'+str(v_acc_max)+'.pdparams')
paddle.save(agcn.state_dict(), save_param_path_model)
scheduler_G.step()
# break
5. 总结:
基于骨骼点进行动作识别输入的是骨骼点信息输出的是类别,从ST-GCN到AGCN-2S,再到这个AAGCN,每一次的改进都是循序渐进的。AGCN-2S让模型学习建立了不单单是已有边的信息,AAGCN这个拓扑结构更加自由的学习,其中AAGCN的公式用AK给Bk进行初始化很不错,并且添加attention.
此文章为搬运
原项目链接
更多推荐
所有评论(0)