paddleocr 实操笔记（前向后梳理）

文章列表

要点：

参考：

基于PaddleOCR的数字显示器字符识别

工业仪表数值识别

前言

问题分析 要处理电表中的数据，可以分为步骤，拆解为以下问题：

感兴趣区域定位问题
OCR读数问题

针对问题1,经过实验与探索，也找到两种方案：

方案1，直接利用PaddleOCR默认自带的检测器，筛选掉其他无效的框体和信息，剩下的就是有用的。(未经过训练的，直接使用预训练模型)

方案2，通过Opencv图像处理的方法，根据电表字符区域特征进行相应的轮廓提取和颜色筛选，从而保证其得到有效的定位。

方案3，收集场景下的大量电表字符识别数据，制作数据集并进行标记，分别训练其定位和识别模型。

考虑到时间成本和人工成本问题，这里优先选择前两种方案。下面是使用Opencv来进行ROI区域定位的方法。

一导包

# 导入依赖库
import os
from tqdm import tqdm
import cv2
import csv
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import warnings
from paddleocr import PaddleOCR, draw_ocr# 忽略警告
warnings.filterwarnings("ignore")os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"  # 防止报错

最后一行是防止报错，

二相关函数

2.1 plt画图

# 可视化绘图
def imshow_image(img_path):img = Image.open(img_path)plt.figure("test_img", figsize=(5, 5))plt.imshow(img)plt.show()

2.2 画出最大轮廓

def find_biggest_contour(image):"""获取最大轮廓"""image = image.copy()contours, hierarchy = cv2.findContours(image, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)contour_sizes = [(cv2.contourArea(contour), contour) for contour in contours]biggest_contour = max(contour_sizes, key=lambda x: x[0])[1]return biggest_contour

2.3 查找ROI轮廓

def get_find_display(input_path, lower=(0, 0, 0), higher=(255, 255, 255), output_path='./'):"""查找ROI轮廓"""img = cv2.imread(input_path)# print('input:', input_path)filename = input_path.split('/')[-1]f_name = filename.split('.')[0]# print('filename:', filename, 'f_name:', f_name)global img_croplowHue = lower[0]lowSat = lower[1]lowVal = lower[2]highHue = higher[0]highSat = higher[1]highVal = higher[2]# 可选择不同的模糊方法frameBGR = cv2.GaussianBlur(img, (7, 7), 0)# 转换为HSV颜色空间hsv = cv2.cvtColor(frameBGR, cv2.COLOR_BGR2HSV)# 定义HSV值颜色范围colorLow = np.array([lowHue, lowSat, lowVal])colorHigh = np.array([highHue, highSat, highVal])mask = cv2.inRange(hsv, colorLow, colorHigh)kernal = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7, 7))mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernal)mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernal)biggest_contour = find_biggest_contour(mask)# cv2.drawContours(img, biggest_contour, -1, (0, 255, 0), 2)print('cnt_len:', len(biggest_contour))# 将遮罩放在原始图像的上方。result_img = cv2.bitwise_and(img, img, mask=mask)if biggest_contour is not None:x, y, w, h = cv2.boundingRect(biggest_contour)print(x, y, w, h)img_crop = img[y:y + h, x:x + w]print('wpath:', output_path + filename)save_path = output_path + filenameif not os.path.exists(output_path):os.mkdir(output_path)cv2.imwrite(save_path, img_crop)else:img_crop = imgreturn result_img, img_crop

三直接使用OCR算法识别

3.1 提取目标识别区域（找出文字区域）

output_path = 'work/roi/'
# img_roi = 'test/133102_steerPoint5_preset1255_20220917221726_v.jpeg'
img_roi = 'test/number_item.jpg'
lower = (0, 80, 0)
higher = (255, 255, 255)
result_img, img_crop = get_find_display(img_roi, lower, higher, output_path)

3.2 查看识别区域

img_roi_path = 'work/roi/number_item.jpg'
imshow_image(img_roi_path)

3.3 直接识别

def rec_display_roi(img_roi): ocr = PaddleOCR() result = ocr.ocr(img_roi, det=False) return result[0][0], result[0][1]
rec_display_roi(img_roi_path)

3.4 直接使用图片进行识别

获取识别区

output_path = 'work/roi/'
img_roi = 'test/number_use.jpg'
lower = (0, 0, 0)
higher = (255, 255, 255)
result_img, img_crop = get_find_display(img_roi, lower, higher, output_path)
imshow_image(img_roi)

进行识别

img_roi_path = "work/roi/number_use.jpg"
imshow_image(img_roi_path)
def rec_display_roi(img_roi): # ocr = PaddleOCR() ocr = PaddleOCR(use_gpu=True)result = ocr.ocr(img_roi, det=False) return result[0][0]
rec_display_roi(img_roi)

识别结果： ('0598', 0.7145649790763855)

四直接使用OCR进行检测和识别

方案2，直接使用PaddleOCR将所有可能是OCR的对象进行检测和识别。再从中筛选要的结果。

4.1 OCR识别读数

# Paddleocr目前支持的多语言语种可以通过修改lang参数进行切换
# 例如`ch`, `en`, `fr`, `german`, `korean`, `japan`
ocr = PaddleOCR(use_angle_cls=True, lang="ch")  # need to run only once to download and load model into memory
# 数据可视化
# img_path = 'test/133102_steerPoint5_preset1255_20220917221726_v.jpeg'
img_path = 'test/number_item.jpg'
save_path = 'work/dst/result.jpg'
result = ocr.ocr(img_path, cls=True)
for line in result:print(line)image = Image.open(img_path).convert('RGB')
boxes = [line[0] for line in result]
txts = [line[1][0] for line in result]
scores = [line[1][1] for line in result]
im_show = draw_ocr(image, boxes, txts, scores, font_path='work/font/simfang.ttf')
im_show = Image.fromarray(im_show)
im_show.save(save_path)

"""[[[1936.0, 56.0], [2461.0, 56.0], [2461.0, 109.0], [1936.0, 109.0]], ('2022-11-28 07:38:28', 0.8835511)]
[[[2461.0, 450.0], [2557.0, 450.0], [2557.0, 500.0], [2461.0, 500.0]], ('原水', 0.99717796)]
[[[12.0, 1310.0], [483.0, 1322.0], [481.0, 1404.0], [10.0, 1392.0]], ('水泵房仪表间', 0.93879247)]
im_show <PIL.Image.Image image mode=RGB size=1200x338 at 0x2852F99EB00>"""

五算法优化

def write_to_csv(log_path, filename='', result=0.00, score=0, mode_head=True):file = open(log_path, 'a+', encoding='utf-8', newline='')csv_writer = csv.writer(file)if mode_head == True:csv_writer.writerow([f'filename', f'result', f'score'])else:csv_writer.writerow([filename, result, score])file.close()def get_bbox_area(box):"""计算bbox的面积"""bbox_area = (max(box[2]) - max(box[0])) * (max(box[3]) - max(box[1]))return bbox_areadef quadArea(nodes):"""计算多边形的面积"""# 基于向量积计算不规则多边形的面积, 坐标点需要按顺序（逆时针或顺时针）选取i_count = len(nodes)area_temp = 0for i in range(i_count):area_temp += nodes[i][0] * nodes[(i + 1) % i_count][1] - nodes[(i + 1) % i_count][0] * nodes[i][1]return abs(area_temp)def bboxes_choose(boxes, txts, scores):"""获取最大框体"""area_list = []for i in range(0, len(boxes)):bx = boxes[i]# area = get_bbox_area(bx)area = quadArea(bx)# print('bx:', bx, 'area:',area)area_list.append(area)if len(area_list) == 0:index = 0else:index = area_list.index(max(area_list))if len(boxes) == 0:boxes = []else:boxes = [boxes[index]]txts = [txts[index]]scores = [scores[index]]return boxes, txts, scoresdef ocr_roi_det(img_path, font, save_path='./work/save/'):"""OCR识别"""result = ocr.ocr(img_path, cls=True)# for line in result:# print(line)# 显示结果from PIL import Imageimage = Image.open(img_path).convert('RGB')fileslist = img_path.split('/')fname = fileslist[-1].split('.')[0]# [[[151.0, 53.0], [277.0, 53.0], [277.0, 111.0], [151.0, 111.0]], ('00.2', 0.9423570036888123)]boxes = [line[0] for line in result]txts = [line[1][0] for line in result]scores = [line[1][1] for line in result]boxes, txts, scores = bboxes_choose(boxes, txts, scores)# bs = nms(boxes, scores)# print('bs:', bs)im_show = draw_ocr(image, boxes, txts, scores, font_path=font)im_show = Image.fromarray(im_show)if not os.path.exists(save_path):os.mkdir(save_path)im_show.save(save_path + fname + '_result.jpg')return txts[0], scores[0]def all_test_det(path, log_path, font, save_path):"""执行识别算法，并记录结果到csv"""count = 0img_list = []img_ans_dic = {}for filepath, dirnames, filenames in os.walk(path):  # 在多级目录下找文件for filename in filenames:file_path = filepath + filename# print('file_path:', file_path)img_list.append(file_path)global scorewrite_to_csv(log_path)for i in tqdm(range(0, len(img_list) - 1)):img_roi = img_list[i]# result, score = rec_display_roi(img_roi)fileslist = img_roi.split('/')fname = fileslist[-1].split('.')[0]result, score = ocr_roi_det(img_roi, font, save_path)print('result:', result, 'score:', score)if result != '':img_ans_dic[fname] = scorecount += 1else:score = -1img_ans_dic[fname] = scorecontinuewrite_to_csv(log_path, fname, result, score, False)print('count:', count)print('dict_len:', len(img_ans_dic))print('ans_dict:', img_ans_dic)

if __name__ == '__main__':# Paddleocr目前支持的多语言语种可以通过修改lang参数进行切换# 例如`ch`, `en`, `fr`, `german`, `korean`, `japan`print('查看ocr数据模型')ocr = PaddleOCR(use_angle_cls=False, lang="en")  # need to run only once to download and load model into memoryprint('ocr:', ocr)# img_path = 'test/number_use.jpg'font_path = 'work/font/simfang.ttf'a, b = ocr_roi_det(img_path, font_path)print('查看识别结果：', a, b)log_path = 'work/log/result.csv'save_path = 'work/save_result/'test_path = 'work/dataset/test/'all_test_det(test_path, log_path, font_path, save_path)# 结果分析# rs_img = 'work/save_result/133102_steerPoint5_preset1255_20220917221726_v_result.jpg'# rs_img = 'test/72635_steerPoint12_preset1294_20220919123447_v.jpeg'rs_img = 'test/number_use.jpg'imshow_image(rs_img)print('执行到最后位置》')

优化后的算法

paddleocr 实操笔记（前向后梳理）

前言

一导包

二相关函数

2.1 plt画图

2.2 画出最大轮廓

2.3 查找ROI轮廓

三直接使用OCR算法识别

3.1 提取目标识别区域（找出文字区域）

3.2 查看识别区域

3.3 直接识别

3.4 直接使用图片进行识别

四直接使用OCR进行检测和识别

4.1 OCR识别读数

五算法优化

公告

标签

paddleocr 实操笔记 （前向后梳理）

前言

一 导包

二 相关函数

2.1 plt画图

2.2 画出最大轮廓

2.3 查找ROI轮廓

三 直接使用OCR算法识别

3.1 提取目标识别区域（找出文字区域）

3.2 查看识别区域

3.3 直接识别

3.4 直接使用图片进行识别

四 直接使用OCR进行检测和识别

4.1 OCR识别读数

五 算法优化

相关问题

公告

标签

paddleocr 实操笔记（前向后梳理）

一导包

二相关函数

三直接使用OCR算法识别

四直接使用OCR进行检测和识别

五算法优化