【AI达人创造营第二期】PaddlePaddle+OpenVINO实现人_副本1
【AI达人创造营第二期】PaddlePaddle+OpenVINO实现人机互动
PaddlePaddle+OpenVINO实现人机互动
转载自AI Studio
项目链接https://aistudio.baidu.com/aistudio/projectdetail/3525813
项目展示
项目说明
项目参考自HandPose_x,本项目通过PaddlePaddle+OpenVINO联合使用,打通该项目从训练到快速部署的一条龙,该项目还有待扩展开发的地方,欢迎加入一起建设。
项目需要通过手部关键点的检测和追踪,分析出手部在现实世界的相对坐标,从而实现对现实世界一些画面信息的截取,再把这部分信息传递到计算机中进行分析,实现人机互动,目前已经实现的功能有:信息分类,特定区域文字提取,画画。
训练
从项目需求中可以知道,要实现基础模块需要:①手部目标检测,②手部关键点检测;扩展功能需要:③分类,④OCR。
目标检测
数据集
数据集为TV-Hand 和 COCO-Hand (COCO-Hand-Big 部分) 合并,TV-Hand 和 COCO-Hand数据集官网地址:网址,因为数据集格式是yolo(txt)格式的,所以需要转成VOC格式。
!mkdir -p train/label
!mv datasets_TVCOCO_hand_train/anno/images train/
from xml.dom.minidom import Document
import os
import cv2
from tqdm import tqdm
def writeXml(tmp, imgname, w, h, objbud):
doc = Document()
# owner
annotation = doc.createElement('annotation')
doc.appendChild(annotation)
# owner
folder = doc.createElement('folder')
annotation.appendChild(folder)
folder_txt = doc.createTextNode("train")
folder.appendChild(folder_txt)
filename = doc.createElement('filename')
annotation.appendChild(filename)
filename_txt = doc.createTextNode(imgname)
filename.appendChild(filename_txt)
# ones#
source = doc.createElement('source')
annotation.appendChild(source)
database = doc.createElement('database')
source.appendChild(database)
database_txt = doc.createTextNode("Unknown")
database.appendChild(database_txt)
# onee#
# twos#
size = doc.createElement('size')
annotation.appendChild(size)
width = doc.createElement('width')
size.appendChild(width)
width_txt = doc.createTextNode(str(w))
width.appendChild(width_txt)
height = doc.createElement('height')
size.appendChild(height)
height_txt = doc.createTextNode(str(h))
height.appendChild(height_txt)
depth = doc.createElement('depth')
size.appendChild(depth)
depth_txt = doc.createTextNode("3")
depth.appendChild(depth_txt)
# twoe#
segmented = doc.createElement('segmented')
annotation.appendChild(segmented)
segmented_txt = doc.createTextNode("0")
segmented.appendChild(segmented_txt)
for i in range(0, int(len(objbud) / 5)):
# threes#
object_new = doc.createElement("object")
annotation.appendChild(object_new)
name = doc.createElement('name')
object_new.appendChild(name)
name_txt = doc.createTextNode(objbud[i * 5])
name.appendChild(name_txt)
pose = doc.createElement('pose')
object_new.appendChild(pose)
pose_txt = doc.createTextNode("Unspecified")
pose.appendChild(pose_txt)
truncated = doc.createElement('truncated')
object_new.appendChild(truncated)
truncated_txt = doc.createTextNode("0")
truncated.appendChild(truncated_txt)
difficult = doc.createElement('difficult')
object_new.appendChild(difficult)
difficult_txt = doc.createTextNode("0")
difficult.appendChild(difficult_txt)
# threes-1#
bndbox = doc.createElement('bndbox')
object_new.appendChild(bndbox)
xmin = doc.createElement('xmin')
bndbox.appendChild(xmin)
xmin_txt = doc.createTextNode(str(objbud[i * 5 + 1]))
xmin.appendChild(xmin_txt)
ymin = doc.createElement('ymin')
bndbox.appendChild(ymin)
ymin_txt = doc.createTextNode(str(objbud[i * 5 + 2]))
ymin.appendChild(ymin_txt)
xmax = doc.createElement('xmax')
bndbox.appendChild(xmax)
xmax_txt = doc.createTextNode(str(objbud[i * 5 + 3]))
xmax.appendChild(xmax_txt)
ymax = doc.createElement('ymax')
bndbox.appendChild(ymax)
ymax_txt = doc.createTextNode(str(objbud[i * 5 + 4]))
ymax.appendChild(ymax_txt)
# threee-1#
# threee#
tempfile = tmp + imgname.split(".")[0] + ".xml"
with open(tempfile, "w") as f:
doc.writexml(f, indent='', addindent='\t', newl='\n', encoding='utf-8')
return
image_path = "./train/images/"
txt_label_path = "./datasets_TVCOCO_hand_train/anno/labels/"
xml_label_path = "./train/label/"
image_name = os.listdir(image_path)
for name in tqdm(image_name):
if ".jpg" in name:
image = cv2.imread(os.path.join(image_path, name))
height, width, _ = image.shape
txt_path = os.path.join(txt_label_path, name.split(".")[0]+".txt")
obj1 = []
obj2 = []
with open(txt_path, "r") as f:
data = f.readlines()
for line in data:
for line_data in line.split("\n")[0].split(" "):
obj1.append(line_data)
for i in range(int(len(obj1)/5)):
if (float(obj1[i * 5 + 1]) - 0.5*float(obj1[i * 5 + 3]) or
float(obj1[i * 5 + 2]) - 0.5*float(obj1[i * 5 + 4]) or
float(obj1[i * 5 + 1]) + 0.5*float(obj1[i * 5 + 3]) or
float(obj1[i * 5 + 2]) + 0.5*float(obj1[i * 5 + 4])) < 0:
pass
else:
obj2.append(obj1[i * 5])
obj2.append(int((float(obj1[i * 5 + 1]) - 0.5*float(obj1[i * 5 + 3]))*width))
obj2.append(int((float(obj1[i * 5 + 2]) - 0.5*float(obj1[i * 5 + 4]))*height))
obj2.append(int((float(obj1[i * 5 + 1]) + 0.5*float(obj1[i * 5 + 3]))*width))
obj2.append(int((float(obj1[i * 5 + 2]) + 0.5*float(obj1[i * 5 + 4]))*height))
if len(obj2) != 0:
writeXml(xml_label_path, name, width, height, obj2)
else:
continue
制作数据集。
import os
image_dir = "train/images/"
xml_dir = "train/label/"
xml_path = os.listdir(xml_dir)
f_train = open("train/train.txt", "w")
f_test = open("train/test.txt", "w")
for i in range(len(xml_path)):
if (i % 100) != 0:
f_train.write(image_dir + xml_path[i].split(".")[0] + ".jpg "+xml_dir+xml_path[i]+"\n")
else:
f_test.write(image_dir + xml_path[i].split(".")[0] + ".jpg "+xml_dir+xml_path[i]+"\n")
通过paddledetection训练目标检测模型
PaddleDetection为基于飞桨PaddlePaddle的端到端目标检测套件,提供多种主流目标检测、实例分割、跟踪、关键点检测算法,配置化的网络模块组件、数据增强策略、损失函数等,推出多种服务器端和移动端工业级SOTA模型,并集成了模型压缩和跨平台高性能部署能力,帮助开发者更快更好完成端到端全开发流程。
一开始在paddledetection使用picodet进行训练(在paddledetection文档中也确实见到picodet支持openvino),后面涉及到openvino的部署,在尝试openvino的部署后发现算子缺失,只能作罢,查询openvino官网资料,发现有支持paddlepaddle yolov3/ppyolo模型的代码,于是第一次尝试选择了yolov3,经过实验最后yolov3成功通过openvino部署,如果想要尝试picodet训练部署,可以使用paddle原生态推理inference,在后面也会给出inference推理代码,不过要是能尽快支持openvino部署还是很香的,插根inter推理棒带来的加速会让picodet这种轻量化模型在端侧发挥更好的优势。
!git clone https://gitee.com/paddlepaddle/PaddleDetection.git
!pip install -r PaddleDetection/requirements.txt
!export CUDA_VISIBLE_DEVICES=0
!python PaddleDetection/tools/train.py -c config/yolo.yml --eval
通过paddlex训练目标检测模型
同样的,也可以通过paddlex进行训练,paddlex也收集了paddledetection的部分检测模型,paddlex更加简单方便地支持模型训练,作为新手是一个不错的选择。
!pip install paddlex==2.1.0
import paddlex as pdx
from paddlex import transforms as T
train_transforms = T.Compose([
T.MixupImage(mixup_epoch=250), T.RandomDistort(),
T.RandomExpand(im_padding_value=[123.675, 116.28, 103.53]), T.RandomCrop(),
T.RandomHorizontalFlip(), T.BatchRandomResize(
target_sizes=[320, 352, 384, 416, 448, 480, 512, 544, 576, 608],
interp='RANDOM'), T.Normalize(
mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
eval_transforms = T.Compose([
T.Resize(
608, interp='CUBIC'), T.Normalize(
mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
train_dataset = pdx.datasets.VOCDetection(
data_dir='./',
file_list='train/train.txt',
label_list='train/labels.txt',
transforms=train_transforms,
shuffle=True)
eval_dataset = pdx.datasets.VOCDetection(
data_dir='./',
file_list='train/test.txt',
label_list='train/labels.txt',
transforms=eval_transforms,
shuffle=False)
num_classes = len(train_dataset.labels)
model = pdx.det.YOLOv3(num_classes=num_classes, backbone='MobileNetV3', nms_topk=500, nms_keep_topk=50)
model.train(
num_epochs=270,
train_dataset=train_dataset,
train_batch_size=32,
eval_dataset=eval_dataset,
pretrain_weights='COCO',
learning_rate=.001,
warmup_steps=1000,
warmup_start_lr=0.0,
save_interval_epochs=5,
lr_decay_epochs=[70, 140, 210],
use_ema=True,
save_dir='output/yolo')
手部关键点
PoseHand_x
可以参照paddlepaddle实现的HandPose_x。从该项目中将模型下载下来后,在下一步的推理模型导出中导出。
PaddleHub
PaddleHub中有大量的PaddlePaddle生态下的预训练模型,完成模型的管理和一键预测。配合使用Fine-tune API,可以基于大规模预训练模型快速完成迁移学习,让预训练模型能更好地服务于用户特定场景的应用,这里面有现成的手部关键点检测模型:hand_pose_localization,可以直接下载该模型(该模型可以直接用来部署)。
可以通过hub install把模型给下载下来,下载完会提示下载位置在哪里,再去那里把模型给拿出来就好。
当然,该模型是肖佬提供的,也可以去他的项目中把模型给拿出来,项目地址指路:项目
!hub install hand_pose_localization==1.0.1
OpenVINO和Inference推理模型导出
目标检测
paddledetection
导出步骤参考文档PaddleDetection部署模型导出教程。
这里是既将inference模型导出,又有onnx模型(用于在openvino部署)。
!pip install paddle2onnx
!pip install onnx
!python PaddleDetection/tools/export_model.py -c config/yolo.yml \
TestReader.inputs_def.image_shape=[1, 3, 608, 608] \
--output_dir inference_model
!paddle2onnx \
--model_dir inference_model/inference_model \
--model_filename model.pdmodel \
--params_filename model.pdiparams \
--save_file yolo.onnx \
--opset_version 11 \
--enable_onnx_checker True
paddlex
导出步骤参考文档paddlex部署模型导出教程。
!paddlex --export_inference --model_dir=./model/ --save_dir=./inference_model --fixed_input_shape=[1,3,608,608]
!paddle2onnx \
--model_dir inference_model/inference_model \
--model_filename model.pdmodel \
--params_filename model.pdiparams \
--save_file yolo.onnx \
--opset_version 11 \
--enable_onnx_checker True
手部关键点模型
PoseHand_x
可以通过使用paddle.onnx.export
api将onnx模型导出。
from resnet50 import resnet50
model_ = resnet50(num_classes=42, img_size=256)
model_.eval() # 设置为前向推断模式
model_path = '848resnet_50-model_epoch-9.pdparams' #训练出的模型
ckpd = paddle.load(model_path)
model_.set_state_dict(ckpd)
x_spec = paddle.static.InputSpec(shape=[1, 3, 256, 256], dtype='float32')
paddle.onnx.export(model, 'posehand', input_spec=[x_spec])
推理部署
目标检测(OpenVINO)
from openvino.inference_engine import IENetwork, IECore
import cv2
import numpy as np
class OpenvinoHandDetectModel(object):
def __init__(self, crop_size=[608, 608], k_top=2):
self.model = OpenvinoHandDetectInference(crop_size=crop_size, k_top=k_top)
self.crop_size = crop_size
def predict(self, img_cv2, threshold):
h, w, _ = img_cv2.shape
output = self.model.forward(img_cv2)
hands_list = []
if len(output) > 0:
if output[0][1] > threshold:
for i in range(len(output)):
if output[i][1] > threshold:
x1 = int(output[i][2] / self.crop_size[0] * w)
y1 = int(output[i][3] / self.crop_size[0] * h)
x2 = int(output[i][4] / self.crop_size[0] * w)
y2 = int(output[i][5] / self.crop_size[0] * h)
hands_list.append([img_cv2[y1:y2, x1:x2], x1, y1, x2, y2])
cv2.rectangle(img_cv2, (x1, y1), (x2, y2), [0, 0, 255], thickness=2)
return hands_list
class OpenvinoHandDetectInference(object):
def __init__(self, model_path="./onnx/model.onnx", crop_size=[608, 608], k_top=2, device="CPU"):
ie = IECore()
net = ie.read_network(model_path)
net.reshape({'image': [1, 3, crop_size[0], crop_size[1]], 'im_shape': [1, 2], 'scale_factor': [1, 2]})
self.exec_net = ie.load_network(net, device)
self.crop_size = crop_size
self.k_top = k_top
def forward(self, src_img):
test_image = handle(src_img, self.crop_size)
test_im_shape = np.array([[608, 608]]).astype('float32')
test_scale_factor = np.array([[1, 1]]).astype('float32')
inputs_dict = {'image': test_image, "im_shape": test_im_shape,
"scale_factor": test_scale_factor}
output = self.exec_net.infer(inputs_dict)
output_data = list(output.values())
return output_data[:self.k_top]
def normalize(src_img, mean, std):
src_img = src_img.astype(np.float32, copy=False)
mean = np.array(mean)[np.newaxis, np.newaxis, :]
std = np.array(std)[np.newaxis, np.newaxis, :]
src_img = src_img / 255.0
src_img -= mean
src_img /= std
return src_img
def handle(src_img, crop_size):
src_img = cv2.resize(src_img, (crop_size[0], crop_size[1]))
src_img = normalize(src_img, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
src_img = src_img.transpose([2, 0, 1])
tensor_img = src_img[None, :].astype("float32")
return tensor_img
目标检测(Inference)
import cv2
import numpy as np
import paddle.inference as inference
class HandDetectModel(object):
def __init__(self, crop_size=[608, 608], k_top=2):
self.model = HandDetectInference(crop_size=crop_size, k_top=k_top)
self.crop_size = crop_size
def predict(self, img_cv2, threshold):
h, w, _ = img_cv2.shape
output = self.model.forward(img_cv2)
hands_list = []
if len(output) > 0:
if output[0][1] > threshold:
for i in range(len(output)):
if output[i][1] > threshold:
x1 = int(output[i][2] / self.crop_size[0] * w)
y1 = int(output[i][3] / self.crop_size[0] * h)
x2 = int(output[i][4] / self.crop_size[0] * w)
y2 = int(output[i][5] / self.crop_size[0] * h)
hands_list.append([img_cv2[y1:y2, x1:x2], x1, y1, x2, y2])
cv2.rectangle(img_cv2, (x1, y1), (x2, y2), [0, 0, 255], thickness=2)
return hands_list
class HandDetectInference(object):
def __init__(self, model_path="./inference_model/model.pdmodel", param_path="./inference_model/model.pdiparams", crop_size=[512, 512], k_top=2):
self.config = inference.Config(model_path, param_path)
self.predictor = inference.create_predictor(self.config)
self.crop_size = crop_size
self.k_top = k_top
def forward(self, src_img):
input_names = self.predictor.get_input_names()
input_handle = self.predictor.get_input_handle(input_names[0])
input_handle.copy_from_cpu(np.array([self.crop_size, ]))
input_handle = self.predictor.get_input_handle(input_names[1])
input_handle.copy_from_cpu(handle(src_img, self.crop_size))
input_handle = self.predictor.get_input_handle(input_names[2])
input_handle.copy_from_cpu(np.array([[1, 1], ]))
output_names = self.predictor.get_output_names()
output_handle = self.predictor.get_output_handle(output_names[0])
self.predictor.run()
output_data = output_handle.copy_to_cpu()
return output_data[:self.k_top]
def normalize(src_img, mean, std):
src_img = src_img.astype(np.float32, copy=False)
mean = np.array(mean)[np.newaxis, np.newaxis, :]
std = np.array(std)[np.newaxis, np.newaxis, :]
src_img = src_img / 255.0
src_img -= mean
src_img /= std
return src_img
def handle(src_img, crop_size):
src_img = cv2.resize(src_img, (crop_size[0], crop_size[1]))
src_img = normalize(src_img, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
src_img = src_img.transpose([2, 0, 1])
tensor_img = src_img[None, :].astype("float32")
return tensor_img
手部关键点模型
import cv2
import numpy as np
from paddle.inference import Config
from paddle.inference import create_predictor
class KeypointInferenceModel(object):
def __init__(self):
self.config = Config("model/__model__", "model/__params__")
self.predictor = create_predictor(self.config)
def forward(self, inpBlob):
input_names = self.predictor.get_input_names()
input_handle = self.predictor.get_input_handle(input_names[0])
output_names = self.predictor.get_output_names()
output_handle = self.predictor.get_output_handle(output_names[0])
input_handle.copy_from_cpu(inpBlob)
self.predictor.run()
output_data = output_handle.copy_to_cpu()
return output_data
class KeypointModel(object):
# 初始化
def __init__(self):
self.num_points = 21
self.inHeight = 368
self.threshold = 0.1
self.point_pairs = [[0, 1], [1, 2], [2, 3], [3, 4],
[0, 5], [5, 6], [6, 7], [7, 8],
[0, 9], [9, 10], [10, 11], [11, 12],
[0, 13], [13, 14], [14, 15], [15, 16],
[0, 17], [17, 18], [18, 19], [19, 20]]
self.model = KeypointInferenceModel()
# 模型推理预测
def predict(self, img_cv2):
# 图像预处理
img_height, img_width, _ = img_cv2.shape
aspect_ratio = img_width / img_height
inWidth = int(((aspect_ratio * self.inHeight) * 8) // 8)
inpBlob = cv2.dnn.blobFromImage(img_cv2, 1.0 / 255, (inWidth, self.inHeight), (0, 0, 0), swapRB=False,
crop=False)
# 模型推理
output = self.model.forward(inpBlob)
# 关键点计算
points = []
for idx in range(self.num_points):
# confidence map
probMap = output[0, idx, :, :]
probMap = cv2.resize(probMap, (img_width, img_height))
# Find global maxima of the probMap.
minVal, prob, minLoc, point = cv2.minMaxLoc(probMap)
if prob > self.threshold:
points.append((int(point[0]), int(point[1])))
else:
points.append(None)
return points
# 手部姿势可视化函数
def vis_pose(self, img_cv2, points, clas_hand):
img_cv2_copy = np.copy(img_cv2)
for idx in range(len(points)):
if points[idx]:
cv2.circle(img_cv2_copy, points[idx], 3, (0, 255, 255), thickness=-1,
lineType=cv2.FILLED)
cv2.putText(img_cv2_copy, "{}".format(idx), points[idx], cv2.FONT_HERSHEY_SIMPLEX,
1, (0, 0, 255), 2, lineType=cv2.LINE_AA)
# Draw Skeleton
for pair in self.point_pairs:
partA = pair[0]
partB = pair[1]
if points[partA] and points[partB]:
cv2.line(img_cv2, points[partA], points[partB], (0, 255, 255), 2)
cv2.circle(img_cv2, points[partA], 3, (0, 0, 255), thickness=-1, lineType=cv2.FILLED)
cv2.circle(img_cv2, points[partB], 3, (0, 0, 255), thickness=-1, lineType=cv2.FILLED)
if clas_hand == "left" and points[20]:
return points[20]
elif clas_hand == "right" and points[8]:
return points[8]
else:
return None
if __name__ == '__main__':
pose_model = KeypointModel()
frame = cv2.imread('test/left.jpg')
res_points = pose_model.predict(frame)
pose_model.vis_pose(frame, res_points, "left")
cv2.imshow("video", frame)
cv2.waitKey(0)
交互策略
①通过目标检测把手检测出来,将含有手的那部分切出来;
②将①中得到的图片放入关键点检测模型进行推理;
③因为有目标检测,可以通过对手实现简单的iou追踪,在某一区域停留过久就认为想要进行交互了;
④将交互flag置为True,通过关键点提取双手食指围成区域进行分类检测或者OCR提取(或单手食指所在点,开始画画,当食指点位移动过快则认为收笔停止作画)。
在④中提到的扩展功能在clas_flag处加入即可(有注释的那个地方)。
Openvino版本下的
from keypoint import KeypointModel
from detect import OpenvinoHandDetectModel
from util import compute_iou
from config import *
import cv2
handdetectmodel = OpenvinoHandDetectModel(crop_size=crop_size)
keypointmodel = KeypointModel()
video = cv2.VideoCapture(0)
ret, frame = video.read()
h, w, _ = frame.shape
while ret:
hands_list = handdetectmodel.predict(frame, detect_threshold)
if len(hands_list) == 1:
if (hands_list[0][1] + hands_list[0][3]) > 2*w:
clas_hand = "right"
else:
clas_hand = "left"
single_hand = hands_list[0][0]
res_points = keypointmodel.predict(single_hand)
point = keypointmodel.vis_pose(single_hand, res_points, clas_hand)
frame[hands_list[0][2]:hands_list[0][4], hands_list[0][1]:hands_list[0][3]] = single_hand
elif len(hands_list) == 2:
if (hands_list[0][1] + hands_list[0][3]) > (hands_list[1][1] + hands_list[1][3]):
clas_hand_1 = "right"
clas_hand_2 = "left"
new_box_l = [hands_list[0][1], hands_list[0][2], hands_list[0][3], hands_list[0][4]]
new_box_r = [hands_list[1][1], hands_list[1][2], hands_list[1][3], hands_list[1][4]]
else:
clas_hand_1 = "left"
clas_hand_2 = "right"
new_box_r = [hands_list[0][1], hands_list[0][2], hands_list[0][3], hands_list[0][4]]
new_box_l = [hands_list[1][1], hands_list[1][2], hands_list[1][3], hands_list[1][4]]
if compute_iou(new_box_l, old_box_l) > iou_threshold and \
compute_iou(new_box_r, old_box_r) > iou_threshold:
iou_times += 1
if iou_times > iou_times_threshold:
iou_flag = True
elif iou_times > clas_times_threshold:
clas_flag = True
else:
iou_times = 0
iou_flag = False
iou_flag_times = 0
clas_flag = False
old_box_l = new_box_l
old_box_r = new_box_r
single_hand_1 = hands_list[0][0]
res_points_1 = keypointmodel.predict(single_hand_1)
point1 = keypointmodel.vis_pose(single_hand_1, res_points_1, clas_hand_1)
frame[hands_list[0][2]:hands_list[0][4], hands_list[0][1]:hands_list[0][3]] = single_hand_1
single_hand_2 = hands_list[1][0]
res_points_2 = keypointmodel.predict(single_hand_2)
point2 = keypointmodel.vis_pose(single_hand_2, res_points_2, clas_hand_2)
frame[hands_list[1][2]:hands_list[1][4], hands_list[1][1]:hands_list[1][3]] = single_hand_2
if iou_flag and not clas_flag:
if point1 != None and point2 != None:
if clas_hand_1 == "left":
cv2.ellipse(frame, (point1[0]+hands_list[0][1], point1[1]+hands_list[0][2]), (12, 12), 0, 0, int(min(8 * iou_flag_times, 360)), (255, 255, 0), thickness=2)
cv2.ellipse(frame, (point2[0]+hands_list[1][1], point2[1]+hands_list[1][2]), (12, 12), 0, 0, int(min(8 * iou_flag_times, 360)), (255, 255, 0), thickness=2)
iou_flag_times += 1
elif clas_flag:
# 扩展部分
pass
else:
pass
Inference版本下的
from keypoint import KeypointModel
from detect import HandDetectModel
from util import compute_iou
from config import *
import cv2
handdetectmodel = HandDetectModel(crop_size=crop_size)
keypointmodel = KeypointModel()
video = cv2.VideoCapture(0)
ret, frame = video.read()
h, w, _ = frame.shape
while ret:
hands_list = handdetectmodel.predict(frame, detect_threshold)
if len(hands_list) == 1:
if (hands_list[0][1] + hands_list[0][3]) > 2*w:
clas_hand = "right"
else:
clas_hand = "left"
single_hand = hands_list[0][0]
res_points = keypointmodel.predict(single_hand)
point = keypointmodel.vis_pose(single_hand, res_points, clas_hand)
frame[hands_list[0][2]:hands_list[0][4], hands_list[0][1]:hands_list[0][3]] = single_hand
elif len(hands_list) == 2:
if (hands_list[0][1] + hands_list[0][3]) > (hands_list[1][1] + hands_list[1][3]):
clas_hand_1 = "right"
clas_hand_2 = "left"
new_box_l = [hands_list[0][1], hands_list[0][2], hands_list[0][3], hands_list[0][4]]
new_box_r = [hands_list[1][1], hands_list[1][2], hands_list[1][3], hands_list[1][4]]
else:
clas_hand_1 = "left"
clas_hand_2 = "right"
new_box_r = [hands_list[0][1], hands_list[0][2], hands_list[0][3], hands_list[0][4]]
new_box_l = [hands_list[1][1], hands_list[1][2], hands_list[1][3], hands_list[1][4]]
if compute_iou(new_box_l, old_box_l) > iou_threshold and \
compute_iou(new_box_r, old_box_r) > iou_threshold:
iou_times += 1
if iou_times > iou_times_threshold:
iou_flag = True
elif iou_times > clas_times_threshold:
clas_flag = True
else:
iou_times = 0
iou_flag = False
iou_flag_times = 0
clas_flag = False
old_box_l = new_box_l
old_box_r = new_box_r
single_hand_1 = hands_list[0][0]
res_points_1 = keypointmodel.predict(single_hand_1)
point1 = keypointmodel.vis_pose(single_hand_1, res_points_1, clas_hand_1)
frame[hands_list[0][2]:hands_list[0][4], hands_list[0][1]:hands_list[0][3]] = single_hand_1
single_hand_2 = hands_list[1][0]
res_points_2 = keypointmodel.predict(single_hand_2)
point2 = keypointmodel.vis_pose(single_hand_2, res_points_2, clas_hand_2)
frame[hands_list[1][2]:hands_list[1][4], hands_list[1][1]:hands_list[1][3]] = single_hand_2
if iou_flag and not clas_flag:
if point1 != None and point2 != None:
if clas_hand_1 == "left":
cv2.ellipse(frame, (point1[0]+hands_list[0][1], point1[1]+hands_list[0][2]), (12, 12), 0, 0, int(min(8 * iou_flag_times, 360)), (255, 255, 0), thickness=2)
cv2.ellipse(frame, (point2[0]+hands_list[1][1], point2[1]+hands_list[1][2]), (12, 12), 0, 0, int(min(8 * iou_flag_times, 360)), (255, 255, 0), thickness=2)
iou_flag_times += 1
elif clas_flag:
# 扩展部分
pass
else:
pass
个人简介
我的公众号
小作者会将在AI Studio上的划桨记录分享到公众号上,而且公众号不定期更新深度学习相关内容,有关于深度学习方面好玩的应用,有论文解读复现,有精读深度学习著作等,小作者还会将在AI Studio公开的项目的背后故事和思考点在公众号同步更新,欢迎关注鸭~
关于作者
学校 | 哈尔滨工业大学(深圳) 大三在读 |
---|---|
感兴趣的方向 | 大号关注:图像视频、强化学习、点云 |
小号关注:文本、语音处理 | |
个人兴趣 | 本人比较喜欢有趣的事情,会开源一些有趣的项目,项目简单且适合新手,欢迎大家常来fork |
主页 | 大号主页 |
小号主页 | |
我的邮箱 | firewhitefox@qq.com |
我的公众号 | Hello Neural Networks |
更多推荐
所有评论(0)