转自AI Studio,原文链接:PaddleOCR课表图片一键转电子版 - 飞桨AI Studio





2.1 小朋友的课表



2.1 大朋友的课表





  • 切分表格单元格子
  • 每个单元格文字识别 那就开干


4.1 引入必要的库

In [ ]

# clone PaddleOCR代码
! git  clone https://gitee.com/PaddlePaddle/PaddleOCR --depth=1
Cloning into 'PaddleOCR'...
remote: Enumerating objects: 1237, done.
remote: Counting objects: 100% (1237/1237), done.
remote: Compressing objects: 100% (1108/1108), done.
remote: Total 1237 (delta 204), reused 699 (delta 79), pack-reused 0
Receiving objects: 100% (1237/1237), 101.41 MiB | 7.16 MiB/s, done.
Resolving deltas: 100% (204/204), done.
Checking connectivity... done.

In [ ]

%cd ~
!pip install -U pip --user >log.log
!pip install -r PaddleOCR/requirements.txt  >log.log
!pip install shapely >log.log
!pip install -e PaddleOCR >log.log

In [1]

%cd ~/PaddleOCR/
import cv2
import numpy as np
from paddleocr import PaddleOCR
import openpyxl

4.2 切分表格


cv2.imshow("二值化图片:", binary)  # 展示图片


In [2]

def seg_pic(img):
    image = cv2.imread(img, 1)

    # 灰度图片
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # 二值化
    binary = cv2.adaptiveThreshold(~gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 35, -5)
    # ret,binary = cv2.threshold(~gray, 127, 255, cv2.THRESH_BINARY)
    # cv2.imshow("二值化图片:", binary)  # 展示图片
    # cv2.waitKey(0)

    rows, cols = binary.shape
    scale = 40
    # 识别横线
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (cols // scale, 1))
    eroded = cv2.erode(binary, kernel, iterations=1)
    # cv2.imshow("Eroded Image",eroded)
    dilatedcol = cv2.dilate(eroded, kernel, iterations=1)
    # cv2.imshow("表格横线展示:", dilatedcol)
    # cv2.waitKey(0)

    # 识别竖线
    scale = 20
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, rows // scale))
    eroded = cv2.erode(binary, kernel, iterations=1)
    dilatedrow = cv2.dilate(eroded, kernel, iterations=1)
    # cv2.imshow("表格竖线展示:", dilatedrow)
    # cv2.waitKey(0)

    # 标识交点
    bitwiseAnd = cv2.bitwise_and(dilatedcol, dilatedrow)
    # cv2.imshow("表格交点展示:", bitwiseAnd)
    # cv2.waitKey(0)
    # cv2.imwrite("my.png",bitwiseAnd) #将二值像素点生成图片保存

    # 标识表格
    merge = cv2.add(dilatedcol, dilatedrow)
    # cv2.imshow("表格整体展示:", merge)
    # cv2.waitKey(0)

    # 两张图片进行减法运算,去掉表格框线
    merge2 = cv2.subtract(binary, merge)
    # cv2.imshow("图片去掉表格框线展示:", merge2)
    # cv2.waitKey(0)

    # 识别黑白图中的白色交叉点,将横纵坐标取出
    ys, xs = np.where(bitwiseAnd > 0)

    mylisty = []  # 纵坐标
    mylistx = []  # 横坐标

    # 通过排序,获取跳变的x和y的值,说明是交点,否则交点会有好多像素值值相近,我只取相近值的最后一点
    # 这个10的跳变不是固定的,根据不同的图片会有微调,基本上为单元格表格的高度(y坐标跳变)和长度(x坐标跳变)
    i = 0
    myxs = np.sort(xs)
    for i in range(len(myxs) - 1):
        if (myxs[i + 1] - myxs[i] > 10):
        i = i + 1
    mylistx.append(myxs[i])  # 要将最后一个点加入

    i = 0
    myys = np.sort(ys)
    # print(np.sort(ys))
    for i in range(len(myys) - 1):
        if (myys[i + 1] - myys[i] > 10):
        i = i + 1
    mylisty.append(myys[i])  # 要将最后一个点加入
    return image, mylistx, mylisty


In [3]

def course_ocr(image, mylistx, mylisty):
    ocr = PaddleOCR(det=True)
    # 循环y坐标,x坐标分割表格
    mylist = []
    for i in range(len(mylisty) - 1):
        row = []
        for j in range(len(mylistx) - 1):
            # 在分割时,第一个参数为y坐标,第二个参数为x坐标
            ROI = image[mylisty[i] + 3:mylisty[i + 1] - 3, mylistx[j]:mylistx[j + 1] - 3]  # 减去3的原因是由于我缩小ROI范围
            # cv2.imshow("分割后子图片展示:", ROI)
            # cv2.waitKey(0)
            result = ocr.ocr(ROI, det=True)
            text_len = len(result)
            tmptxt = ' '
            txt = ' '
            if text_len != 0:
                for line in result:
                    tmptxt, _ = line[-1]
                    txt = txt + '\n' + tmptxt
            j = j + 1
        i = i + 1

    return mylist



In [5]

def writeToExcel(file_path, new_list):
    wb = openpyxl.Workbook()
    ws = wb.active
    ws.title = '我的课程表'
    for r in range(len(new_list)):
        for c in range(len(new_list[0])):
            ws.cell(r + 1, c + 1).value = new_list[r][c]
            ws.cell(r + 1, c + 1).alignment = openpyxl.styles.Alignment(wrapText=True)
            # excel中的行和列是从1开始计数的,所以需要+1
    wb.save(file_path)  # 注意,写入后一定要保存
    print("成功写入文件: " + file_path + " !")
    return 1

if __name__ == '__main__':
    img = '../1.jpg'
    image, mylistx, mylisty = seg_pic(img)
    mylist = course_ocr(image, mylistx, mylisty)
    writeToExcel('../mycourse.xls', mylist)
成功写入文件: ../mycourse.xls !




  • 下一步,可以加入自动识别表格方向,并利用小程序、android app来部署,提供更好的体验。
  • 完整的代码已放目录下了。
  • 大家可以试试自己的课表,看识别效果如何?
  • 欢迎提各种好建议、好思路、好点子,晚安!!!


