paddleocr 实操笔记 (前向后梳理)
要点:
参考:
基于PaddleOCR的数字显示器字符识别
工业仪表数值识别
前言
问题分析 要处理电表中的数据,可以分为步骤,拆解为以下问题:
- 感兴趣区域定位问题
- OCR读数问题
针对问题1,经过实验与探索,也找到两种方案:
方案1,直接利用PaddleOCR默认自带的检测器,筛选掉其他无效的框体和信息,剩下的就是有用的。(未经过训练的,直接使用预训练模型)
方案2,通过Opencv图像处理的方法,根据电表字符区域特征进行相应的轮廓提取和颜色筛选,从而保证其得到有效的定位。
方案3,收集场景下的大量电表字符识别数据,制作数据集并进行标记,分别训练其定位和识别模型。
考虑到时间成本和人工成本问题,这里优先选择前两种方案。 下面是使用Opencv来进行ROI区域定位的方法。
一 导包
# 导入依赖库
import os
from tqdm import tqdm
import cv2
import csv
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import warnings
from paddleocr import PaddleOCR, draw_ocr# 忽略警告
warnings.filterwarnings("ignore")os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE" # 防止报错
最后一行是防止报错,
二 相关函数
2.1 plt画图
# 可视化绘图
def imshow_image(img_path):img = Image.open(img_path)plt.figure("test_img", figsize=(5, 5))plt.imshow(img)plt.show()
2.2 画出最大轮廓
def find_biggest_contour(image):"""获取最大轮廓"""image = image.copy()contours, hierarchy = cv2.findContours(image, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)contour_sizes = [(cv2.contourArea(contour), contour) for contour in contours]biggest_contour = max(contour_sizes, key=lambda x: x[0])[1]return biggest_contour
2.3 查找ROI轮廓
def get_find_display(input_path, lower=(0, 0, 0), higher=(255, 255, 255), output_path='./'):"""查找ROI轮廓"""img = cv2.imread(input_path)# print('input:', input_path)filename = input_path.split('/')[-1]f_name = filename.split('.')[0]# print('filename:', filename, 'f_name:', f_name)global img_croplowHue = lower[0]lowSat = lower[1]lowVal = lower[2]highHue = higher[0]highSat = higher[1]highVal = higher[2]# 可选择不同的模糊方法frameBGR = cv2.GaussianBlur(img, (7, 7), 0)# 转换为HSV颜色空间hsv = cv2.cvtColor(frameBGR, cv2.COLOR_BGR2HSV)# 定义HSV值颜色范围colorLow = np.array([lowHue, lowSat, lowVal])colorHigh = np.array([highHue, highSat, highVal])mask = cv2.inRange(hsv, colorLow, colorHigh)kernal = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7, 7))mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernal)mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernal)biggest_contour = find_biggest_contour(mask)# cv2.drawContours(img, biggest_contour, -1, (0, 255, 0), 2)print('cnt_len:', len(biggest_contour))# 将遮罩放在原始图像的上方。result_img = cv2.bitwise_and(img, img, mask=mask)if biggest_contour is not None:x, y, w, h = cv2.boundingRect(biggest_contour)print(x, y, w, h)img_crop = img[y:y + h, x:x + w]print('wpath:', output_path + filename)save_path = output_path + filenameif not os.path.exists(output_path):os.mkdir(output_path)cv2.imwrite(save_path, img_crop)else:img_crop = imgreturn result_img, img_crop
三 直接使用OCR算法识别
3.1 提取目标识别区域(找出文字区域)
output_path = 'work/roi/'
# img_roi = 'test/133102_steerPoint5_preset1255_20220917221726_v.jpeg'
img_roi = 'test/number_item.jpg'
lower = (0, 80, 0)
higher = (255, 255, 255)
result_img, img_crop = get_find_display(img_roi, lower, higher, output_path)
3.2 查看识别区域
img_roi_path = 'work/roi/number_item.jpg'
imshow_image(img_roi_path)
3.3 直接识别
def rec_display_roi(img_roi): ocr = PaddleOCR() result = ocr.ocr(img_roi, det=False) return result[0][0], result[0][1]
rec_display_roi(img_roi_path)
3.4 直接使用图片进行识别
获取识别区
output_path = 'work/roi/'
img_roi = 'test/number_use.jpg'
lower = (0, 0, 0)
higher = (255, 255, 255)
result_img, img_crop = get_find_display(img_roi, lower, higher, output_path)
imshow_image(img_roi)
进行识别
img_roi_path = "work/roi/number_use.jpg"
imshow_image(img_roi_path)
def rec_display_roi(img_roi): # ocr = PaddleOCR() ocr = PaddleOCR(use_gpu=True)result = ocr.ocr(img_roi, det=False) return result[0][0]
rec_display_roi(img_roi)
识别结果: ('0598', 0.7145649790763855)
四 直接使用OCR进行检测和识别
方案2,直接使用PaddleOCR将所有可能是OCR的对象进行检测和识别。 再从中筛选要的结果。
4.1 OCR识别读数
# Paddleocr目前支持的多语言语种可以通过修改lang参数进行切换
# 例如`ch`, `en`, `fr`, `german`, `korean`, `japan`
ocr = PaddleOCR(use_angle_cls=True, lang="ch") # need to run only once to download and load model into memory
# 数据可视化
# img_path = 'test/133102_steerPoint5_preset1255_20220917221726_v.jpeg'
img_path = 'test/number_item.jpg'
save_path = 'work/dst/result.jpg'
result = ocr.ocr(img_path, cls=True)
for line in result:print(line)image = Image.open(img_path).convert('RGB')
boxes = [line[0] for line in result]
txts = [line[1][0] for line in result]
scores = [line[1][1] for line in result]
im_show = draw_ocr(image, boxes, txts, scores, font_path='work/font/simfang.ttf')
im_show = Image.fromarray(im_show)
im_show.save(save_path)
"""[[[1936.0, 56.0], [2461.0, 56.0], [2461.0, 109.0], [1936.0, 109.0]], ('2022-11-28 07:38:28', 0.8835511)]
[[[2461.0, 450.0], [2557.0, 450.0], [2557.0, 500.0], [2461.0, 500.0]], ('原水', 0.99717796)]
[[[12.0, 1310.0], [483.0, 1322.0], [481.0, 1404.0], [10.0, 1392.0]], ('水泵房仪表间', 0.93879247)]
im_show <PIL.Image.Image image mode=RGB size=1200x338 at 0x2852F99EB00>"""
五 算法优化
def write_to_csv(log_path, filename='', result=0.00, score=0, mode_head=True):file = open(log_path, 'a+', encoding='utf-8', newline='')csv_writer = csv.writer(file)if mode_head == True:csv_writer.writerow([f'filename', f'result', f'score'])else:csv_writer.writerow([filename, result, score])file.close()def get_bbox_area(box):"""计算bbox的面积"""bbox_area = (max(box[2]) - max(box[0])) * (max(box[3]) - max(box[1]))return bbox_areadef quadArea(nodes):"""计算多边形的面积"""# 基于向量积计算不规则多边形的面积, 坐标点需要按顺序(逆时针或顺时针)选取i_count = len(nodes)area_temp = 0for i in range(i_count):area_temp += nodes[i][0] * nodes[(i + 1) % i_count][1] - nodes[(i + 1) % i_count][0] * nodes[i][1]return abs(area_temp)def bboxes_choose(boxes, txts, scores):"""获取最大框体"""area_list = []for i in range(0, len(boxes)):bx = boxes[i]# area = get_bbox_area(bx)area = quadArea(bx)# print('bx:', bx, 'area:',area)area_list.append(area)if len(area_list) == 0:index = 0else:index = area_list.index(max(area_list))if len(boxes) == 0:boxes = []else:boxes = [boxes[index]]txts = [txts[index]]scores = [scores[index]]return boxes, txts, scoresdef ocr_roi_det(img_path, font, save_path='./work/save/'):"""OCR识别"""result = ocr.ocr(img_path, cls=True)# for line in result:# print(line)# 显示结果from PIL import Imageimage = Image.open(img_path).convert('RGB')fileslist = img_path.split('/')fname = fileslist[-1].split('.')[0]# [[[151.0, 53.0], [277.0, 53.0], [277.0, 111.0], [151.0, 111.0]], ('00.2', 0.9423570036888123)]boxes = [line[0] for line in result]txts = [line[1][0] for line in result]scores = [line[1][1] for line in result]boxes, txts, scores = bboxes_choose(boxes, txts, scores)# bs = nms(boxes, scores)# print('bs:', bs)im_show = draw_ocr(image, boxes, txts, scores, font_path=font)im_show = Image.fromarray(im_show)if not os.path.exists(save_path):os.mkdir(save_path)im_show.save(save_path + fname + '_result.jpg')return txts[0], scores[0]def all_test_det(path, log_path, font, save_path):"""执行识别算法,并记录结果到csv"""count = 0img_list = []img_ans_dic = {}for filepath, dirnames, filenames in os.walk(path): # 在多级目录下找文件for filename in filenames:file_path = filepath + filename# print('file_path:', file_path)img_list.append(file_path)global scorewrite_to_csv(log_path)for i in tqdm(range(0, len(img_list) - 1)):img_roi = img_list[i]# result, score = rec_display_roi(img_roi)fileslist = img_roi.split('/')fname = fileslist[-1].split('.')[0]result, score = ocr_roi_det(img_roi, font, save_path)print('result:', result, 'score:', score)if result != '':img_ans_dic[fname] = scorecount += 1else:score = -1img_ans_dic[fname] = scorecontinuewrite_to_csv(log_path, fname, result, score, False)print('count:', count)print('dict_len:', len(img_ans_dic))print('ans_dict:', img_ans_dic)
if __name__ == '__main__':# Paddleocr目前支持的多语言语种可以通过修改lang参数进行切换# 例如`ch`, `en`, `fr`, `german`, `korean`, `japan`print('查看ocr数据模型')ocr = PaddleOCR(use_angle_cls=False, lang="en") # need to run only once to download and load model into memoryprint('ocr:', ocr)# img_path = 'test/number_use.jpg'font_path = 'work/font/simfang.ttf'a, b = ocr_roi_det(img_path, font_path)print('查看识别结果:', a, b)log_path = 'work/log/result.csv'save_path = 'work/save_result/'test_path = 'work/dataset/test/'all_test_det(test_path, log_path, font_path, save_path)# 结果分析# rs_img = 'work/save_result/133102_steerPoint5_preset1255_20220917221726_v_result.jpg'# rs_img = 'test/72635_steerPoint12_preset1294_20220919123447_v.jpeg'rs_img = 'test/number_use.jpg'imshow_image(rs_img)print('执行到最后位置》')
优化后的算法