【2021飞桨黑客松】数字华容道

训练一个自己做3*3数字华容道的模型

AI Studio

527人浏览 · 2021-12-26 18:33:55

AI Studio · 2021-12-26 18:33:55 发布

数字华容道

数字华容道是用尽量少的步数，尽量短的时间，将棋盘上的数字方块，按照从左到右、从上到下的顺序重新排列整齐。简单归类就是限制移动的空间排序。

很早之前就想训练一个可以自己打数字华容道游戏的模型了！！使用过强化学习，但发现效果并不理想（见不收敛的强化学习项目——数字华容道），所以现在用这种方式看看效果如何！

先看一下效果

数据集

生成的3*3尺寸的数字华容道数据集，将数字9作为可移动的格，数据集格式如下：

deep:0
1 2 3 
4 5 6 
7 8 9 
action:-842150451

deep表示深度（层数），中间三层为数字，以空格分开，action表示应该向哪方向移动。action_dict={0:down,1:up,2:left,3:right}
数据集使用c语言穷举生成，共181440条（=9!/2），因为华容道部分排列（约一半）是无法得到最终结果。数据集生成文件为CreateDate.cpp，在Windows中可运行。

为什么不生成4*4的数据集呢？

其实我在生成，已经运行三天了，它卡在了第20层，在20层里不同的排序就有160多万，如果根据最终总数=16!/2，那么我需要生成10,461,394,944,000即10万亿多种排列方式，我根本不知道将会消耗多久才能跑完。

deep=1, len=2 totallen=2
deep=2, len=4 totallen=6
deep=3, len=10 totallen=16
deep=4, len=24 totallen=40
deep=5, len=54 totallen=94
deep=6, len=107 totallen=201
deep=7, len=212 totallen=413
deep=8, len=446 totallen=859
deep=9, len=946 totallen=1805
deep=10, len=1948 totallen=3753
deep=11, len=3938 totallen=7691
deep=12, len=7808 totallen=15499
deep=13, len=15544 totallen=31043
deep=14, len=30821 totallen=61864
deep=15, len=60842 totallen=122706
deep=16, len=119000 totallen=241706
deep=17, len=231844 totallen=473550
deep=18, len=447342 totallen=920892
deep=19, len=859744 totallen=1780636
deep=20, len=1637383 totallen=3418019

使用随机森林对华容道数据进行分类

分类要保证精确率达到1.0，将n_estimators设置80以上即可。

with open("data/data118590/file.txt","r",encoding="utf-8") as fp:
    lines=fp.readlines()
    train_data=[]
    train_label=[]
    i=5
    while i < len(lines):
        if lines[i].startswith("deep"):
            i+=1
            continue
        if lines[i].startswith("action"):
            train_label.append(int(lines[i].strip().split(":")[-1]))
            i+=1
        else:
            data=lines[i].strip('\n')+lines[i+1].strip('\n')+lines[i+2].strip('\n')
            train_data.append([int(d) for d in data.strip().split(" ")])
            i=i+3
print(len(train_data))
print(len(train_label))

181439
181439

import joblib
from sklearn.ensemble import RandomForestClassifier

rf_clf = RandomForestClassifier(n_estimators=80,random_state=0)#n_estimators=70得分0.9999889770115576，80得分1.0，所以我选80
rf_clf.fit(train_data,train_label)
score_train = rf_clf.score(train_data,train_label)
print(score_train)
joblib.dump(rf_clf, 'rf_clf.joblib')

1.0





['rf_clf.joblib']

在QQ小程序中验证一下

这段代码使用网易的airtest运行（Windows环境），虽然ocr识别效果不太好，只要最开始预测的时候没有多个9，后面就可以顺利运行，但还是偷懒用一下，哈哈，应该自行训练一个数字分类网络，保证每次输入到分类器中的数据是准确的。airtest其实还是比较好用的！！

# -*- encoding=utf8 -*-
__author__ = "漫舞枪神"

from airtest.core.api import *
import os
import cv2
import numpy as np
import paddlehub as hub
import joblib
from airtest.cli.parser import cli_setup

if not cli_setup():
    auto_setup(__file__, logdir=True, devices=["你的设备?cap_method=MINICAP&&ori_method=MINICAPORI&&touch_method=MAXTOUCH",])
    
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

print("start...")
w,h=device().get_current_resolution()#获取手机分辨率
print(w,h)

ocr = hub.Module(name="chinese_ocr_db_crnn_mobile")
auto_setup(__file__)

map_size = (3,3)
interval=150
groundnum=map_size[0]*map_size[1]
boundary = [(30,550),(1100,1620)]
digit = [ str(i+1) for i in range(groundnum)]

def check_result(maps,map_size,digit):
    for i in range(map_size[0]):
        for j in range(map_size[1]):
            if maps[i,j]!=int(digit[i*map_size[0]+j]):
                return False
    return True

def check_map(maps):
    tamp = np.zeros(groundnum, dtype = np.bool)
    for i in maps:
        tamp[i - 1] = True
    return tamp.all()

def CreateMap(results, digit):
    maps=np.ones(map_size,dtype=int)*groundnum
    datas=results[0]['data']
    for data in datas:
        if data['text'] in digit:
            point = [(data['text_box_position'][0][0]+data['text_box_position'][2][0])//2,
                     (data['text_box_position'][0][1]+data['text_box_position'][2][1])//2]
            maps[(point[1]-boundary[0][1])//((boundary[1][1]-boundary[0][1])//map_size[1])][(point[0]-boundary[0][0])//((boundary[1][0]-boundary[0][0])//map_size[0])]=int(data['text'])
    return maps

#负片计算函数
def imcomplement(img):
    table = np.array([255-i for i in np.arange(0, 256)]).astype("uint8")    
    return cv2.LUT(img, table) #使用OpenCV的查找表函数

def swap(maps,point1,point2):
    temp=maps[point1[1]][point1[0]]
    maps[point1[1]][point1[0]]=maps[point2[1]][point2[0]]
    maps[point2[1]][point2[0]]=temp
    return maps
restart = True
while True:
    snapshot(filename='pic.jpg')

    np_images =[imcomplement(cv2.imread('log/pic.jpg'))] 
    results = ocr.recognize_text(
                        images=np_images,         # 图片数据，ndarray.shape 为 [H, W, C]，BGR格式；
                        use_gpu=True,            # 是否使用 GPU；若使用GPU，请先设置CUDA_VISIBLE_DEVICES环境变量
                        output_dir='ocr_result',  # 图片的保存路径，默认设为 ocr_result；
                        visualization=True,       # 是否将识别结果保存为图片文件；
                        box_thresh=0.5,           # 检测文本框置信度的阈值；
                        text_thresh=0.5)          # 识别中文文本置信度的阈值；
    #print(results)
    new_maps=CreateMap(results,digit)
    if check_map(new_maps)  or restart:
        maps = new_maps
        restart = False
    print(maps)
    rf_clf = joblib.load("rf_clf.joblib")
    action = rf_clf.predict([maps.flatten()])[0]
    print(action)
    def findgound(maps,map_size):
        ground=[0,0]
        for i in range(map_size[0]):
            for j in range(map_size[1]):
                if maps[i,j]==groundnum:
                    ground[0]=j
                    ground[1]=i
                    print(ground)
                    return ground
    ground = findgound(maps,map_size)
    
    def up(maps):
        touch((boundary[0][0]+interval+(boundary[1][0]-boundary[0][0])//map_size[0]*ground[0],boundary[0][1]+interval+(boundary[1][1]-boundary[0][1])//map_size[1]*(ground[1]-1)))
        return swap(maps, ground,(ground[0],ground[1]-1))
    def down(maps):
        touch((boundary[0][0]+interval+(boundary[1][0]-boundary[0][0])//map_size[0]*ground[0],boundary[0][1]+interval+(boundary[1][1]-boundary[0][1])//map_size[1]*(ground[1]+1)))
        return swap(maps, ground,(ground[0],ground[1]+1))
    def left(maps):
        touch((boundary[0][0]+interval+(boundary[1][0]-boundary[0][0])//map_size[0]*(ground[0]-1),boundary[0][1]+interval+(boundary[1][1]-boundary[0][1])//map_size[1]*ground[1]))
        return swap(maps, ground,(ground[0]-1,ground[1]))
    def right(maps):
        touch((boundary[0][0]+interval+(boundary[1][0]-boundary[0][0])//map_size[0]*(ground[0]+1),boundary[0][1]+interval+(boundary[1][1]-boundary[0][1])//map_size[1]*ground[1]))
        return swap(maps, ground,(ground[0]+1,ground[1]))
    action_dict={0:down,1:up,2:left,3:right}
    maps = action_dict[action](maps)
    if check_result(maps, map_size,digit):
        break
    sleep(2.0)

请点击此处查看本环境基本用法.

Please click here for more detailed instructions.