From eae28dfcc6049cbe25d6e54f6e819d43c7b5db42 Mon Sep 17 00:00:00 2001 From: LDOUBLEV Date: Wed, 26 Oct 2022 10:29:46 +0800 Subject: [PATCH 001/112] refine paddle.js deploy doc --- deploy/paddlejs/README.md | 26 +++++++++++++++++++------- deploy/paddlejs/README_ch.md | 25 +++++++++++++++++++++---- 2 files changed, 40 insertions(+), 11 deletions(-) diff --git a/deploy/paddlejs/README.md b/deploy/paddlejs/README.md index e80b7cce..eef39b6c 100644 --- a/deploy/paddlejs/README.md +++ b/deploy/paddlejs/README.md @@ -1,14 +1,26 @@ -English| [简体中文](README_ch.md) +English| [简体中文](README_ch.md) -# Paddle.js +# Paddle.js Introduction -[Paddle.js](https://github.com/PaddlePaddle/Paddle.js) is a web project for Baidu PaddlePaddle, which is an open source deep learning framework running in the browser. Paddle.js can either load a pre-trained model, or transforming a model from paddle-hub with model transforming tools provided by Paddle.js. It could run in every browser with WebGL/WebGPU/WebAssembly supported. It could also run in Baidu Smartprogram and WX miniprogram. +[Paddle.js](https://github.com/PaddlePaddle/Paddle.js) is a web project for Baidu PaddlePaddle, which is an open source deep learning framework running in the browser. Paddle.js can either load a pre-trained model, or transforming a model from paddle-hub with model transforming tools provided by Paddle.js. It could run in every browser with WebGL/WebGPU/WebAssembly supported. It could also run in Baidu Smartprogram and wechat miniprogram. +## Web Demo +Run OCR demo in browser refer to [tutorial](https://github.com/PaddlePaddle/FastDeploy/blob/develop/examples/application/js/WebDemo.md). -- [Online experience](https://paddlejs.baidu.com/ocr) -- [Tutorial](https://github.com/PaddlePaddle/Paddle.js/blob/release/v2.2.3/packages/paddlejs-models/ocr/README_cn.md) -- Visualization: +|demo|web demo dicrctory|visualization| +|-|-|-| +|PP-OCRv3|[TextDetection、TextRecognition](https://github.com/PaddlePaddle/FastDeploy/tree/develop/examples/application/js/web_demo/src/pages/cv/ocr/)|| + + +## Mini Program Demo +The Mini Program demo running tutorial eference +Run OCR demo in wechat miniprogram refer to [tutorial](https://github.com/PaddlePaddle/FastDeploy/tree/develop/examples/application/js/mini_program). + +|demo|directory| +|-|-| +|Text Detection| [ocrdetecXcx](https://github.com/PaddlePaddle/FastDeploy/tree/develop/examples/application/js/mini_program/ocrdetectXcx/) | +|Text Recognition| [ocrXcx](https://github.com/PaddlePaddle/FastDeploy/tree/develop/examples/application/js/mini_program/ocrXcx/) |
-
\ No newline at end of file + diff --git a/deploy/paddlejs/README_ch.md b/deploy/paddlejs/README_ch.md index 9e514df0..46666130 100644 --- a/deploy/paddlejs/README_ch.md +++ b/deploy/paddlejs/README_ch.md @@ -5,10 +5,27 @@ [Paddle.js](https://github.com/PaddlePaddle/Paddle.js) 是百度 PaddlePaddle 的 web 方向子项目,是一个运行在浏览器中的开源深度学习框架。Paddle.js 可以加载提前训练好的 paddle 模型,通过 Paddle.js 的模型转换工具 paddlejs-converter 变成浏览器友好的模型进行在线推理预测使用。目前,Paddle.js 可以在支持 WebGL/WebGPU/WebAssembly 的浏览器中运行,也可以在百度小程序和微信小程序环境下运行。 -- [在线体验](https://paddlejs.baidu.com/ocr) -- [直达教程](https://github.com/PaddlePaddle/Paddle.js/blob/release/v2.2.3/packages/paddlejs-models/ocr/README_cn.md) +## Web Demo使用 + +在浏览器中直接运行官方OCR demo参考[教程](https://github.com/PaddlePaddle/FastDeploy/blob/develop/examples/application/js/WebDemo.md) + +|demo名称|web demo目录|可视化| +|-|-|-| +|PP-OCRv3|[TextDetection、TextRecognition](https://github.com/PaddlePaddle/FastDeploy/tree/develop/examples/application/js/web_demo/src/pages/cv/ocr/)|| + + +## 微信小程序Demo使用 + +在微信小程序运行官方demo参考[教程](https://github.com/PaddlePaddle/FastDeploy/tree/develop/examples/application/js/mini_program) + +|名称|目录| +|-|-| +|OCR文本检测| [ocrdetecXcx](https://github.com/PaddlePaddle/FastDeploy/tree/develop/examples/application/js/mini_program/ocrdetectXcx/) | +|OCR文本识别| [ocrXcx](https://github.com/PaddlePaddle/FastDeploy/tree/develop/examples/application/js/mini_program/ocrXcx/) | + + - 效果:
- -
\ No newline at end of file + + -- Gitee From b1e37fa42cf21811d1c821d28eab8896126c170a Mon Sep 17 00:00:00 2001 From: zhoujun Date: Thu, 27 Oct 2022 11:16:54 +0800 Subject: [PATCH 002/112] add re export cmd in kie doc (#8123) --- doc/doc_ch/kie.md | 40 +++++++++++++++++++++++++++++++++++++++- doc/doc_en/kie_en.md | 41 +++++++++++++++++++++++++++++++++++++++-- 2 files changed, 78 insertions(+), 3 deletions(-) diff --git a/doc/doc_ch/kie.md b/doc/doc_ch/kie.md index b6f38a66..26d2e560 100644 --- a/doc/doc_ch/kie.md +++ b/doc/doc_ch/kie.md @@ -438,7 +438,25 @@ inference/ser_vi_layoutxlm/ └── inference.pdmodel # inference模型的模型结构文件 ``` -RE任务的动转静过程适配中,敬请期待。 +信息抽取模型中的RE任务转inference模型步骤如下: + +``` bash +# -c 后面设置训练算法的yml配置文件 +# -o 配置可选参数 +# Architecture.Backbone.checkpoints 参数设置待转换的训练模型地址 +# Global.save_inference_dir 参数设置转换的模型将保存的地址 + +python3 tools/export_model.py -c configs/kie/vi_layoutxlm/re_vi_layoutxlm_xfund_zh.yml -o Architecture.Backbone.checkpoints=./output/re_vi_layoutxlm_xfund_zh/best_accuracy Global.save_inference_dir=./inference/re_vi_layoutxlm +``` + +转换成功后,在目录下有三个文件: + +``` +inference/re_vi_layoutxlm/ + ├── inference.pdiparams # inference模型的参数文件 + ├── inference.pdiparams.info # inference模型的参数信息,可忽略 + └── inference.pdmodel # inference模型的模型结构文件 +``` ## 4.2 模型推理 @@ -461,6 +479,26 @@ python3 kie/predict_kie_token_ser.py \ +VI-LayoutXLM模型基于RE任务进行推理,可以执行如下命令: + +```bash +cd ppstructure +python3 kie/predict_kie_token_ser_re.py \ + --kie_algorithm=LayoutXLM \ + --re_model_dir=../inference/re_vi_layoutxlm \ + --ser_model_dir=../inference/ser_vi_layoutxlm \ + --use_visual_backbone=False \ + --image_dir=./docs/kie/input/zh_val_42.jpg \ + --ser_dict_path=../train_data/XFUND/class_list_xfun.txt \ + --vis_font_path=../doc/fonts/simfang.ttf \ + --ocr_order_method="tb-yx" +``` + +RE可视化结果默认保存到`./output`文件夹里面,结果示例如下: + +
+ +
# 5. FAQ diff --git a/doc/doc_en/kie_en.md b/doc/doc_en/kie_en.md index 0c335a5c..cd1fffb2 100644 --- a/doc/doc_en/kie_en.md +++ b/doc/doc_en/kie_en.md @@ -457,14 +457,31 @@ inference/ser_vi_layoutxlm/ └── inference.pdmodel # The program file of recognition ``` -Export of RE model is also in adaptation. +The RE model can be converted to the inference model using the following command. + +```bash +# -c Set the training algorithm yml configuration file. +# -o Set optional parameters. +# Architecture.Backbone.checkpoints Set the training model address. +# Global.save_inference_dir Set the address where the converted model will be saved. +python3 tools/export_model.py -c configs/kie/vi_layoutxlm/re_vi_layoutxlm_xfund_zh.yml -o Architecture.Backbone.checkpoints=./output/re_vi_layoutxlm_xfund_zh/best_accuracy Global.save_inference_dir=./inference/re_vi_layoutxlm +``` + +After the conversion is successful, there are three files in the model save directory: + +``` +inference/re_vi_layoutxlm/ + ├── inference.pdiparams # The parameter file of recognition inference model + ├── inference.pdiparams.info # The parameter information of recognition inference model, which can be ignored + └── inference.pdmodel # The program file of recognition +``` ## 4.2 Model inference The VI layoutxlm model performs reasoning based on the ser task, and can execute the following commands: -Using the following command to infer the VI-LayoutXLM model. +Using the following command to infer the VI-LayoutXLM SER model. ```bash cd ppstructure @@ -483,6 +500,26 @@ The visualized result will be saved in `./output`, which is shown as follows. +Using the following command to infer the VI-LayoutXLM RE model. + +```bash +cd ppstructure +python3 kie/predict_kie_token_ser_re.py \ + --kie_algorithm=LayoutXLM \ + --re_model_dir=../inference/re_vi_layoutxlm \ + --ser_model_dir=../inference/ser_vi_layoutxlm \ + --use_visual_backbone=False \ + --image_dir=./docs/kie/input/zh_val_42.jpg \ + --ser_dict_path=../train_data/XFUND/class_list_xfun.txt \ + --vis_font_path=../doc/fonts/simfang.ttf \ + --ocr_order_method="tb-yx" +``` + +The visualized result will be saved in `./output`, which is shown as follows. + +
+ +
# 5. FAQ -- Gitee From b8cde49ee3f55b3e968ac20259245a88727ce996 Mon Sep 17 00:00:00 2001 From: LDOUBLEV Date: Thu, 27 Oct 2022 15:37:15 +0800 Subject: [PATCH 003/112] add min_area_rect_crop --- tools/infer/predict_system.py | 7 +++++-- tools/infer/utility.py | 23 +++++++++++++++++++++++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/tools/infer/predict_system.py b/tools/infer/predict_system.py index affd0d1b..234d8ad5 100755 --- a/tools/infer/predict_system.py +++ b/tools/infer/predict_system.py @@ -34,7 +34,7 @@ import tools.infer.predict_det as predict_det import tools.infer.predict_cls as predict_cls from ppocr.utils.utility import get_image_file_list, check_and_read from ppocr.utils.logging import get_logger -from tools.infer.utility import draw_ocr_box_txt, get_rotate_crop_image +from tools.infer.utility import draw_ocr_box_txt, get_rotate_crop_image, get_minarea_rect_crop logger = get_logger() @@ -80,7 +80,10 @@ class TextSystem(object): for bno in range(len(dt_boxes)): tmp_box = copy.deepcopy(dt_boxes[bno]) - img_crop = get_rotate_crop_image(ori_im, tmp_box) + if args.det_box_type == "quad": + img_crop = get_rotate_crop_image(ori_im, tmp_box) + else: + img_crop = get_minarea_rect_crop(ori_im, tmp_box) img_crop_list.append(img_crop) if self.use_angle_cls and cls: img_crop_list, angle_list, elapse = self.text_classifier( diff --git a/tools/infer/utility.py b/tools/infer/utility.py index 34cad259..207b2e5f 100644 --- a/tools/infer/utility.py +++ b/tools/infer/utility.py @@ -629,6 +629,29 @@ def get_rotate_crop_image(img, points): return dst_img +def get_minarea_rect_crop(img, points): + bounding_box = cv2.minAreaRect(np.array(points).astype(np.int32)) + points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0]) + + index_a, index_b, index_c, index_d = 0, 1, 2, 3 + if points[1][1] > points[0][1]: + index_a = 0 + index_d = 1 + else: + index_a = 1 + index_d = 0 + if points[3][1] > points[2][1]: + index_b = 2 + index_c = 3 + else: + index_b = 3 + index_c = 2 + + box = [points[index_a], points[index_b], points[index_c], points[index_d]] + crop_img = get_rotate_crop_image(img, np.array(box)) + return crop_img + + def check_gpu(use_gpu): if use_gpu and not paddle.is_compiled_with_cuda(): use_gpu = False -- Gitee From 9d9591533aae5136d33601a6186ea5d52e90cff2 Mon Sep 17 00:00:00 2001 From: LDOUBLEV Date: Thu, 27 Oct 2022 15:39:31 +0800 Subject: [PATCH 004/112] add min_area_rect_crop --- tools/infer/predict_system.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/infer/predict_system.py b/tools/infer/predict_system.py index 234d8ad5..19e0525a 100755 --- a/tools/infer/predict_system.py +++ b/tools/infer/predict_system.py @@ -80,7 +80,7 @@ class TextSystem(object): for bno in range(len(dt_boxes)): tmp_box = copy.deepcopy(dt_boxes[bno]) - if args.det_box_type == "quad": + if self.args.det_box_type == "quad": img_crop = get_rotate_crop_image(ori_im, tmp_box) else: img_crop = get_minarea_rect_crop(ori_im, tmp_box) -- Gitee From 84ec4503ef8f5333b039fc54f8af56bd1d8dfb1c Mon Sep 17 00:00:00 2001 From: andyjpaddle Date: Thu, 27 Oct 2022 07:51:45 +0000 Subject: [PATCH 005/112] rm visionlan invalid params --- ppocr/data/imaug/label_ops.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/ppocr/data/imaug/label_ops.py b/ppocr/data/imaug/label_ops.py index 63c5d6aa..8f56ea10 100644 --- a/ppocr/data/imaug/label_ops.py +++ b/ppocr/data/imaug/label_ops.py @@ -1396,10 +1396,9 @@ class VLLabelEncode(BaseRecLabelEncode): max_text_length, character_dict_path=None, use_space_char=False, - lower=True, **kwargs): - super(VLLabelEncode, self).__init__( - max_text_length, character_dict_path, use_space_char, lower) + super(VLLabelEncode, self).__init__(max_text_length, + character_dict_path, use_space_char) self.dict = {} for i, char in enumerate(self.character): self.dict[char] = i -- Gitee From 4604e7688092e3076ae4a3ea6e893b90889dd6fd Mon Sep 17 00:00:00 2001 From: Evezerest <50011306+Evezerest@users.noreply.github.com> Date: Fri, 28 Oct 2022 19:39:33 +0800 Subject: [PATCH 006/112] Update requirements.txt --- PPOCRLabel/requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/PPOCRLabel/requirements.txt b/PPOCRLabel/requirements.txt index d66dba23..fd42a2de 100644 --- a/PPOCRLabel/requirements.txt +++ b/PPOCRLabel/requirements.txt @@ -1,3 +1,3 @@ pyqt5 -paddleocr -xlrd==1.2.0 \ No newline at end of file +paddleocr==2.6.0.0 +xlrd==1.2.0 -- Gitee From 5f06a8068eed3e1755b8ca56004cd46c95ac268d Mon Sep 17 00:00:00 2001 From: Double_V Date: Fri, 4 Nov 2022 14:04:17 +0800 Subject: [PATCH 007/112] fix PP-OCRv3 det train (#8208) --- ...Rv3_det_train.md => PP-OCRv3_det_train.md} | 28 +- doc/doc_ch/PP-OCRv3_introduction.md | 2 + doc/doc_en/PP-OCRv3_det_train_en.md | 253 ++++++++++++++++++ doc/doc_en/PP-OCRv3_introduction_en.md | 1 + 4 files changed, 270 insertions(+), 14 deletions(-) rename doc/doc_ch/{PPOCRv3_det_train.md => PP-OCRv3_det_train.md} (92%) create mode 100644 doc/doc_en/PP-OCRv3_det_train_en.md diff --git a/doc/doc_ch/PPOCRv3_det_train.md b/doc/doc_ch/PP-OCRv3_det_train.md similarity index 92% rename from doc/doc_ch/PPOCRv3_det_train.md rename to doc/doc_ch/PP-OCRv3_det_train.md index 601acdde..b3bbc896 100644 --- a/doc/doc_ch/PPOCRv3_det_train.md +++ b/doc/doc_ch/PP-OCRv3_det_train.md @@ -1,14 +1,16 @@ +[English](../doc_en/PP-OCRv3_det_train_en.md) | 简体中文 + # PP-OCRv3 文本检测模型训练 - [1. 简介](#1) -- [2. PPOCRv3检测训练](#2) -- [3. 基于PPOCRv3检测的finetune训练](#3) +- [2. PP-OCRv3检测训练](#2) +- [3. 基于PP-OCRv3检测的finetune训练](#3) ## 1. 简介 -PP-OCRv3在PP-OCRv2的基础上进一步升级。本节介绍PP-OCRv3检测模型的训练步骤。有关PPOCRv3策略介绍参考[文档](./PP-OCRv3_introduction.md)。 +PP-OCRv3在PP-OCRv2的基础上进一步升级。本节介绍PP-OCRv3检测模型的训练步骤。有关PP-OCRv3策略介绍参考[文档](./PP-OCRv3_introduction.md)。 @@ -55,10 +57,10 @@ python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/train.py -c configs/ 训练过程中保存的模型在output目录下,包含以下文件: ``` -best_accuracy.states +best_accuracy.states best_accuracy.pdparams # 默认保存最优精度的模型参数 best_accuracy.pdopt # 默认保存最优精度的优化器相关参数 -latest.states +latest.states latest.pdparams # 默认保存的最新模型参数 latest.pdopt # 默认保存的最新模型的优化器相关参数 ``` @@ -145,19 +147,19 @@ paddle.save(s_params, "./pretrain_models/cml_student.pdparams") -## 3. 基于PPOCRv3检测finetune训练 +## 3. 基于PP-OCRv3检测finetune训练 -本节介绍如何使用PPOCRv3检测模型在其他场景上的finetune训练。 +本节介绍如何使用PP-OCRv3检测模型在其他场景上的finetune训练。 finetune训练适用于三种场景: -- 基于CML蒸馏方法的finetune训练,适用于教师模型在使用场景上精度高于PPOCRv3检测模型,且希望得到一个轻量检测模型。 -- 基于PPOCRv3轻量检测模型的finetune训练,无需训练教师模型,希望在PPOCRv3检测模型基础上提升使用场景上的精度。 +- 基于CML蒸馏方法的finetune训练,适用于教师模型在使用场景上精度高于PP-OCRv3检测模型,且希望得到一个轻量检测模型。 +- 基于PP-OCRv3轻量检测模型的finetune训练,无需训练教师模型,希望在PP-OCRv3检测模型基础上提升使用场景上的精度。 - 基于DML蒸馏方法的finetune训练,适用于采用DML方法进一步提升精度的场景。 **基于CML蒸馏方法的finetune训练** -下载PPOCRv3训练模型: +下载PP-OCRv3训练模型: ``` wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_distill_train.tar tar xf ch_PP-OCRv3_det_distill_train.tar @@ -177,10 +179,10 @@ python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/train.py -c configs Global.save_model_dir=./output/ ``` -**基于PPOCRv3轻量检测模型的finetune训练** +**基于PP-OCRv3轻量检测模型的finetune训练** -下载PPOCRv3训练模型,并提取Student结构的模型参数: +下载PP-OCRv3训练模型,并提取Student结构的模型参数: ``` wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_distill_train.tar tar xf ch_PP-OCRv3_det_distill_train.tar @@ -248,5 +250,3 @@ python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/train.py -c configs/ Architecture.Models.Student2.pretrained=./teacher \ Global.save_model_dir=./output/ ``` - - diff --git a/doc/doc_ch/PP-OCRv3_introduction.md b/doc/doc_ch/PP-OCRv3_introduction.md index 446af23e..5ef16fc7 100644 --- a/doc/doc_ch/PP-OCRv3_introduction.md +++ b/doc/doc_ch/PP-OCRv3_introduction.md @@ -63,6 +63,8 @@ PP-OCRv3检测模型是对PP-OCRv2中的[CML](https://arxiv.org/pdf/2109.03144.p 测试环境: Intel Gold 6148 CPU,预测时开启MKLDNN加速。 +PP-OCRv3检测模型训练步骤参考[文档](./PP-OCRv3_det_train.md) + **(1)LK-PAN:大感受野的PAN结构** LK-PAN (Large Kernel PAN) 是一个具有更大感受野的轻量级[PAN](https://arxiv.org/pdf/1803.01534.pdf)结构,核心是将PAN结构的path augmentation中卷积核从`3*3`改为`9*9`。通过增大卷积核,提升特征图每个位置覆盖的感受野,更容易检测大字体的文字以及极端长宽比的文字。使用LK-PAN结构,可以将教师模型的hmean从83.2%提升到85.0%。 diff --git a/doc/doc_en/PP-OCRv3_det_train_en.md b/doc/doc_en/PP-OCRv3_det_train_en.md new file mode 100644 index 00000000..693d8e41 --- /dev/null +++ b/doc/doc_en/PP-OCRv3_det_train_en.md @@ -0,0 +1,253 @@ +English | [简体中文](../doc_ch/PP-OCRv3_det_train.md) + + +# The training steps of PP-OCRv3 text detection model + +- [1. Introduction](#1) +- [2. PP-OCRv3 detection training](#2) +- [3. Finetune training based on PP-OCRv3 detection](#3) + + +## 1 Introduction + +PP-OCRv3 is further upgraded on the basis of PP-OCRv2. This section describes the training steps of the PP-OCRv3 detection model. Refer to [documentation](./ppocr_introduction_en.md) for PP-OCRv3 introduction. + + + +## 2. Detection training + +The PP-OCRv3 detection model is an upgrade of the [CML](https://arxiv.org/pdf/2109.03144.pdf) (Collaborative Mutual Learning) collaborative mutual learning text detection distillation strategy in PP-OCRv2. PP-OCRv3 is further optimized for detecting teacher model and student model respectively. Among them, when optimizing the teacher model, the PAN structure LK-PAN with large receptive field and the DML (Deep Mutual Learning) distillation strategy are proposed. when optimizing the student model, the FPN structure RSE-FPN with residual attention mechanism is proposed. + +PP-OCRv3 detection training consists of two steps: +- Step 1: Train detection teacher model using DML distillation method +- Step 2: Use the teacher model obtained in Step 1 to train a lightweight student model using the CML method + + +### 2.1 Prepare data and environment + +The training data adopts icdar2015 data, and the steps to prepare the training set refer to [ocr_dataset](./dataset/ocr_datasets.md). + +Runtime environment preparation reference [documentation](./installation_en.md). + +### 2.2 Train the teacher model + +The configuration file for teacher model training is [ch_PP-OCRv3_det_dml.yml](https://github.com/PaddlePaddle/PaddleOCR/blob/release%2F2.5/configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_dml.yml). The Backbone, Neck, and Head of the model structure of the teacher model are Resnet50, LKPAN, and DBHead, respectively, and are trained by the distillation method of DML. Refer to [documentation](./knowledge_distillation) for a detailed introduction to configuration files. + + +Download ImageNet pretrained models: +```` +# Download the pretrained model of ResNet50_vd +wget -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/pretrained/ResNet50_vd_ssld_pretrained.pdparams +```` + +**Start training** +```` +# Single GPU training +python3 tools/train.py -c configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_dml.yml \ + -o Architecture.Models.Student.pretrained=./pretrain_models/ResNet50_vd_ssld_pretrained \ + Architecture.Models.Student2.pretrained=./pretrain_models/ResNet50_vd_ssld_pretrained \ + Global.save_model_dir=./output/ + +# If you want to use multi-GPU distributed training, use the following command: +python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/train.py -c configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_dml.yml \ + -o Architecture.Models.Student.pretrained=./pretrain_models/ResNet50_vd_ssld_pretrained \ + Architecture.Models.Student2.pretrained=./pretrain_models/ResNet50_vd_ssld_pretrained \ + Global.save_model_dir=./output/ +```` + +The model saved during training is in the output directory and contains the following files: +```` +best_accuracy.states +best_accuracy.pdparams # The model parameters with the best accuracy are saved by default +best_accuracy.pdopt # optimizer-related parameters that save optimal accuracy by default +latest.states +latest.pdparams # The latest model parameters saved by default +latest.pdopt # Optimizer related parameters of the latest model saved by default +```` +Among them, best_accuracy is the saved model parameter with the highest accuracy, which can be directly evaluated using this model. + +The model evaluation command is as follows: +```` +python3 tools/eval.py -c configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_dml.yml -o Global.checkpoints=./output/best_accuracy +```` + +The trained teacher model has a larger structure and higher accuracy, which is used to improve the accuracy of the student model. + +**Extract teacher model parameters** +best_accuracy contains the parameters of two models, corresponding to Student and Student2 in the configuration file respectively. The method of extracting the parameters of Student is as follows: + +```` +import paddle +# load pretrained model +all_params = paddle.load("output/best_accuracy.pdparams") +# View the keys of the weight parameter +print(all_params.keys()) +# model weight extraction +s_params = {key[len("Student."):]: all_params[key] for key in all_params if "Student." in key} +# View the keys of the model weight parameters +print(s_params.keys()) +# save +paddle.save(s_params, "./pretrain_models/dml_teacher.pdparams") +```` + +The extracted model parameters can be used for further finetune training or distillation training of the model. + + +### 2.3 Train the student model + +The configuration file for training the student model is [ch_PP-OCRv3_det_cml.yml](https://github.com/PaddlePaddle/PaddleOCR/blob/release%2F2.5/configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_cml.yml) +The teacher model trained in the previous section is used as supervision, and the lightweight student model is obtained by training in CML. + +Download the ImageNet pretrained model for the student model: +```` +# Download the pre-trained model of MobileNetV3 +wget -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/pretrained/MobileNetV3_large_x0_5_pretrained.pdparams +```` + +**Start training** + +```` +# Single card training +python3 tools/train.py -c configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_cml.yml \ + -o Architecture.Models.Student.pretrained=./pretrain_models/MobileNetV3_large_x0_5_pretrained \ + Architecture.Models.Student2.pretrained=./pretrain_models/MobileNetV3_large_x0_5_pretrained \ + Architecture.Models.Teacher.pretrained=./pretrain_models/dml_teacher \ + Global.save_model_dir=./output/ +# If you want to use multi-GPU distributed training, use the following command: +python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/train.py -c configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_cml.yml \ + -o Architecture.Models.Student.pretrained=./pretrain_models/MobileNetV3_large_x0_5_pretrained \ + Architecture.Models.Student2.pretrained=./pretrain_models/MobileNetV3_large_x0_5_pretrained \ + Architecture.Models.Teacher.pretrained=./pretrain_models/dml_teacher \ + Global.save_model_dir=./output/ +```` + +The model saved during training is in the output directory, +The model evaluation command is as follows: +```` +python3 tools/eval.py -c configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_cml.yml -o Global.checkpoints=./output/best_accuracy +```` + +best_accuracy contains three model parameters, corresponding to Student, Student2, and Teacher in the configuration file. The method to extract the Student parameter is as follows: + +```` +import paddle +# load pretrained model +all_params = paddle.load("output/best_accuracy.pdparams") +# View the keys of the weight parameter +print(all_params.keys()) +# model weight extraction +s_params = {key[len("Student."):]: all_params[key] for key in all_params if "Student." in key} +# View the keys of the model weight parameters +print(s_params.keys()) +# save +paddle.save(s_params, "./pretrain_models/cml_student.pdparams") +```` + +The extracted parameters of Student can be used for model deployment or further finetune training. + + + + +## 3. Finetune training based on PP-OCRv3 detection + +This section describes how to use the finetune training of the PP-OCRv3 detection model on other scenarios. + +finetune training applies to three scenarios: +- The finetune training based on the CML distillation method is suitable for the teacher model whose accuracy is higher than the PP-OCRv3 detection model in the usage scene, and a lightweight detection model is desired. +- Finetune training based on the PP-OCRv3 lightweight detection model, without the need to train the teacher model, hoping to improve the accuracy of the usage scenarios based on the PP-OCRv3 detection model. +- The finetune training based on the DML distillation method is suitable for scenarios where the DML method is used to further improve the accuracy. + + +**finetune training based on CML distillation method** + +Download the PP-OCRv3 training model: +```` +wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_distill_train.tar +tar xf ch_PP-OCRv3_det_distill_train.tar +```` +ch_PP-OCRv3_det_distill_train/best_accuracy.pdparams contains the parameters of the Student, Student2, and Teacher models in the CML configuration file. + +Start training: + +```` +# Single card training +python3 tools/train.py -c configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_cml.yml \ + -o Global.pretrained_model=./ch_PP-OCRv3_det_distill_train/best_accuracy \ + Global.save_model_dir=./output/ +# If you want to use multi-GPU distributed training, use the following command: +python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/train.py -c configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_cml.yml \ + -o Global.pretrained_model=./ch_PP-OCRv3_det_distill_train/best_accuracy \ + Global.save_model_dir=./output/ +```` + +**finetune training based on PP-OCRv3 lightweight detection model** + + +Download the PP-OCRv3 training model and extract the model parameters of the Student structure: +```` +wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_distill_train.tar +tar xf ch_PP-OCRv3_det_distill_train.tar +```` + +The method to extract the Student parameter is as follows: + +```` +import paddle +# load pretrained model +all_params = paddle.load("output/best_accuracy.pdparams") +# View the keys of the weight parameter +print(all_params.keys()) +# model weight extraction +s_params = {key[len("Student."):]: all_params[key] for key in all_params if "Student." in key} +# View the keys of the model weight parameters +print(s_params.keys()) +# save +paddle.save(s_params, "./student.pdparams") +```` + +Trained using the configuration file [ch_PP-OCRv3_det_student.yml](https://github.com/PaddlePaddle/PaddleOCR/blob/release%2F2.5/configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_student.yml). + +**Start training** + +```` +# Single card training +python3 tools/train.py -c configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_student.yml \ + -o Global.pretrained_model=./student \ + Global.save_model_dir=./output/ +# If you want to use multi-GPU distributed training, use the following command: +python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/train.py -c configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_student.yml \ + -o Global.pretrained_model=./student \ + Global.save_model_dir=./output/ +```` + + +**finetune training based on DML distillation method** + +Taking the Teacher model in ch_PP-OCRv3_det_distill_train as an example, first extract the parameters of the Teacher structure as follows: +```` +import paddle +# load pretrained model +all_params = paddle.load("ch_PP-OCRv3_det_distill_train/best_accuracy.pdparams") +# View the keys of the weight parameter +print(all_params.keys()) +# model weight extraction +s_params = {key[len("Teacher."):]: all_params[key] for key in all_params if "Teacher." in key} +# View the keys of the model weight parameters +print(s_params.keys()) +# save +paddle.save(s_params, "./teacher.pdparams") +```` + +**Start training** +```` +# Single card training +python3 tools/train.py -c configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_dml.yml \ + -o Architecture.Models.Student.pretrained=./teacher \ + Architecture.Models.Student2.pretrained=./teacher \ + Global.save_model_dir=./output/ +# If you want to use multi-GPU distributed training, use the following command: +python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/train.py -c configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_dml.yml \ + -o Architecture.Models.Student.pretrained=./teacher \ + Architecture.Models.Student2.pretrained=./teacher \ + Global.save_model_dir=./output/ +```` diff --git a/doc/doc_en/PP-OCRv3_introduction_en.md b/doc/doc_en/PP-OCRv3_introduction_en.md index 8d5a36ed..fe80b684 100644 --- a/doc/doc_en/PP-OCRv3_introduction_en.md +++ b/doc/doc_en/PP-OCRv3_introduction_en.md @@ -65,6 +65,7 @@ The ablation experiments are as follows: Testing environment: Intel Gold 6148 CPU, with MKLDNN acceleration enabled during inference. +The training steps of PP-OCRv3 detection model refer to [tutorial](./PP-OCRv3_det_train_en.md) **(1) LK-PAN: A PAN structure with large receptive field** -- Gitee From 070463918719ef75c3962afb6d2662c6aecc28f2 Mon Sep 17 00:00:00 2001 From: Double_V Date: Wed, 9 Nov 2022 20:26:32 +0800 Subject: [PATCH 008/112] [doc] fix east doc (#8254) * fix PP-OCRv3 det train * fix doc --- doc/doc_ch/algorithm_det_east.md | 6 ++++-- doc/doc_en/algorithm_det_east_en.md | 5 +++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/doc/doc_ch/algorithm_det_east.md b/doc/doc_ch/algorithm_det_east.md index 94a0d097..c2bb70a5 100644 --- a/doc/doc_ch/algorithm_det_east.md +++ b/doc/doc_ch/algorithm_det_east.md @@ -26,8 +26,10 @@ |模型|骨干网络|配置文件|precision|recall|Hmean|下载链接| | --- | --- | --- | --- | --- | --- | --- | -|EAST|ResNet50_vd|88.71%| 81.36%| 84.88%| [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_east_v2.0_train.tar)| -|EAST| MobileNetV3| 78.20%| 79.10%| 78.65%| [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_east_v2.0_train.tar)| +|EAST|ResNet50_vd| [det_r50_vd_east.yml](../../configs/det/det_r50_vd_east.yml)|88.71%| 81.36%| 84.88%| [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_east_v2.0_train.tar)| +|EAST|MobileNetV3|[det_mv3_east.yml](../../configs/det/det_mv3_east.yml) | 78.20%| 79.10%| 78.65%| [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_east_v2.0_train.tar)| + + diff --git a/doc/doc_en/algorithm_det_east_en.md b/doc/doc_en/algorithm_det_east_en.md index 3848464a..85440deb 100644 --- a/doc/doc_en/algorithm_det_east_en.md +++ b/doc/doc_en/algorithm_det_east_en.md @@ -26,8 +26,9 @@ On the ICDAR2015 dataset, the text detection result is as follows: |Model|Backbone|Configuration|Precision|Recall|Hmean|Download| | --- | --- | --- | --- | --- | --- | --- | -|EAST|ResNet50_vd|88.71%| 81.36%| 84.88%| [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_east_v2.0_train.tar)| -|EAST| MobileNetV3| 78.20%| 79.10%| 78.65%| [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_east_v2.0_train.tar)| +|EAST|ResNet50_vd| [det_r50_vd_east.yml](../../configs/det/det_r50_vd_east.yml)|88.71%| 81.36%| 84.88%| [model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_east_v2.0_train.tar)| +|EAST|MobileNetV3|[det_mv3_east.yml](../../configs/det/det_mv3_east.yml) | 78.20%| 79.10%| 78.65%| [model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_east_v2.0_train.tar)| + -- Gitee From 82ae953c68d6eb8ea9f1776a9d9c287370402076 Mon Sep 17 00:00:00 2001 From: littletomatodonkey Date: Tue, 15 Nov 2022 10:40:27 +0800 Subject: [PATCH 009/112] fix finetune (#8302) --- doc/doc_ch/finetune.md | 31 ++++++++++--------------------- 1 file changed, 10 insertions(+), 21 deletions(-) diff --git a/doc/doc_ch/finetune.md b/doc/doc_ch/finetune.md index 973c4cb1..2aff0c65 100644 --- a/doc/doc_ch/finetune.md +++ b/doc/doc_ch/finetune.md @@ -26,21 +26,11 @@ PaddleOCR提供的PP-OCR系列模型在通用场景中性能优异,能够解 ### 2.2 模型选择 -建议选择PP-OCRv2模型(配置文件:[ch_PP-OCRv2_det_student.yml](../../configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_student.yml),预训练模型:[ch_PP-OCRv2_det_distill_train.tar](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_distill_train.tar))进行微调,其精度与泛化性能是目前提供的最优预训练模型。 +建议选择PP-OCRv3模型(配置文件:[ch_PP-OCRv3_det_student.yml](../../configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_student.yml),预训练模型:[ch_PP-OCRv3_det_distill_train.tar](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_distill_train.tar))进行微调,其精度与泛化性能是目前提供的最优预训练模型。 -更多PP-OCR系列模型,请参考[PaddleOCR 首页说明文档](../../README_ch.md)。 +更多PP-OCR系列模型,请参考[PP-OCR 系列模型库](./models_list.md)。 -注意:在使用上述预训练模型的时候,由于保存的模型中包含教师模型,因此需要将其中的学生模型单独提取出来,再加载学生模型即可进行模型微调。 - -```python -import paddle -# 加载完整的检测预训练模型 -a = paddle.load("ch_PP-OCRv2_det_distill_train/best_accuracy.pdparams") -# 提取学生模型的参数 -b = {k[len("student_model."):]: a[k] for k in a if "student_model." in k} -# 保存模型,用于后续模型微调 -paddle.save(b, "ch_PP-OCRv2_det_student.pdparams") -``` +注意:在使用上述预训练模型的时候,需要使用文件夹中的`student.pdparams`文件作为预训练模型,即,仅使用学生模型。 ### 2.3 训练超参选择 @@ -49,7 +39,7 @@ paddle.save(b, "ch_PP-OCRv2_det_student.pdparams") ```yaml Global: - pretrained_model: ./pretrain_models/student.pdparams # 预训练模型路径 + pretrained_model: ./ch_PP-OCRv3_det_distill_train/student.pdparams # 预训练模型路径 Optimizer: lr: name: Cosine @@ -67,7 +57,7 @@ Train: num_workers: 4 ``` -上述配置文件中,首先需要将`pretrained_model`字段指定为2.2章节中提取出来的`ch_PP-OCRv2_det_student.pdparams`文件路径。 +上述配置文件中,首先需要将`pretrained_model`字段指定为`student.pdparams`文件路径。 PaddleOCR提供的配置文件是在8卡训练(相当于总的batch size是`8*8=64`)、且没有加载预训练模型情况下的配置文件,因此您的场景中,学习率与总的batch size需要对应线性调整,例如 @@ -88,7 +78,7 @@ PaddleOCR提供的配置文件是在8卡训练(相当于总的batch size是`8* | det_db_score_mode | str | "fast" | DB的检测结果得分计算方法,支持`fast`和`slow`,`fast`是根据polygon的外接矩形边框内的所有像素计算平均得分,`slow`是根据原始polygon内的所有像素计算平均得分,计算速度相对较慢一些,但是更加准确一些。 | -更多关于推理方法的介绍可以参考[Paddle Inference推理教程](./inference.md)。 +更多关于推理方法的介绍可以参考[Paddle Inference推理教程](././inference_ppocr.md)。 ## 3. 文本识别模型微调 @@ -109,9 +99,9 @@ PaddleOCR提供的配置文件是在8卡训练(相当于总的batch size是`8* ### 3.2 模型选择 -建议选择PP-OCRv2模型(配置文件:[ch_PP-OCRv2_rec_distillation.yml](../../configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_distillation.yml),预训练模型:[ch_PP-OCRv2_rec_train.tar](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_train.tar))进行微调,其精度与泛化性能是目前提供的最优预训练模型。 +建议选择PP-OCRv3模型(配置文件:[ch_PP-OCRv3_rec_distillation.yml](../../configs/rec/PP-OCRv3/ch_PP-OCRv3_rec_distillation.yml),预训练模型:[ch_PP-OCRv3_rec_train.tar](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_train.tar))进行微调,其精度与泛化性能是目前提供的最优预训练模型。 -更多PP-OCR系列,模型请参考[PaddleOCR 首页说明文档](../../README_ch.md)。 +更多PP-OCR系列模型,请参考[PP-OCR 系列模型库](./models_list.md)。 ### 3.3 训练超参选择 @@ -147,7 +137,7 @@ Train: ``` -上述配置文件中,首先需要将`pretrained_model`字段指定为2.2章节中解压得到的`ch_PP-OCRv2_rec_train/best_accuracy.pdparams`文件路径。 +上述配置文件中,首先需要将`pretrained_model`字段指定为2.2章节中解压得到的`ch_PP-OCRv3_rec_train/best_accuracy.pdparams`文件路径。 PaddleOCR提供的配置文件是在8卡训练(相当于总的batch size是`8*128=1024`)、且没有加载预训练模型情况下的配置文件,因此您的场景中,学习率与总的batch size需要对应线性调整,例如: @@ -175,5 +165,4 @@ Train: ### 3.4 训练调优 -训练过程并非一蹴而就的,完成一个阶段的训练评估后,建议收集分析当前模型在真实场景中的 badcase,有针对性的调整训练数据比例,或者进一步新增合成数据。 -通过多次迭代训练,不断优化模型效果。 +训练过程并非一蹴而就的,完成一个阶段的训练评估后,建议收集分析当前模型在真实场景中的 badcase,有针对性的调整训练数据比例,或者进一步新增合成数据。通过多次迭代训练,不断优化模型效果。 -- Gitee From f36aa02670ac8f58435e8c000eb724ed76c0e1c5 Mon Sep 17 00:00:00 2001 From: Evezerest <50011306+Evezerest@users.noreply.github.com> Date: Tue, 15 Nov 2022 19:10:34 +0800 Subject: [PATCH 010/112] Update requirements.txt --- deploy/avh/requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/deploy/avh/requirements.txt b/deploy/avh/requirements.txt index 1bf86ed1..a1a8626f 100644 --- a/deploy/avh/requirements.txt +++ b/deploy/avh/requirements.txt @@ -1,3 +1,4 @@ paddlepaddle numpy -opencv-python \ No newline at end of file +opencv-python +typing-extensions -- Gitee From 0c69f1f3f17f5a63fdc921d02c80af6bf24bc7f9 Mon Sep 17 00:00:00 2001 From: zhoujun Date: Wed, 16 Nov 2022 09:47:07 +0800 Subject: [PATCH 011/112] add can, stt to algorithm_overview_en.md (#8328) --- doc/doc_ch/algorithm_overview.md | 30 +++++++++++++++++++++++++ doc/doc_en/algorithm_overview_en.md | 34 ++++++++++++++++++++++++++++- 2 files changed, 63 insertions(+), 1 deletion(-) diff --git a/doc/doc_ch/algorithm_overview.md b/doc/doc_ch/algorithm_overview.md index 7f6919c1..02a4cbad 100755 --- a/doc/doc_ch/algorithm_overview.md +++ b/doc/doc_ch/algorithm_overview.md @@ -3,6 +3,8 @@ - [1. 两阶段OCR算法](#1) - [1.1 文本检测算法](#11) - [1.2 文本识别算法](#12) + - [1.3 文本超分辨率算法](#13) + - [1.4 公式识别算法](#14) - [2. 端到端OCR算法](#2) - [3. 表格识别算法](#3) - [4. 关键信息抽取算法](#4) @@ -107,6 +109,34 @@ PaddleOCR将**持续新增**支持OCR领域前沿算法与模型,**欢迎广 |RobustScanner|ResNet31| 87.77% | rec_r31_robustscanner | [训练模型](https://paddleocr.bj.bcebos.com/contribution/rec_r31_robustscanner.tar)| |RFL|ResNetRFL| 88.63% | rec_resnet_rfl_att | [训练模型](https://paddleocr.bj.bcebos.com/contribution/rec_resnet_rfl_att_train.tar) | + + + +### 1.3 文本超分辨率算法 +已支持的文本超分辨率算法列表(戳链接获取使用教程): +- [x] [Text Gestalt](./algorithm_sr_gestalt.md) +- [x] [Text Telescope](./algorithm_sr_telescope.md) + +在TextZoom公开数据集上,算法效果如下: + +|模型|骨干网络|PSNR_Avg|SSIM_Avg|配置文件|下载链接| +|---|---|---|---|---|---| +|Text Gestalt|tsrn|19.28|0.6560| [configs/sr/sr_tsrn_transformer_strock.yml](../../configs/sr/sr_tsrn_transformer_strock.yml)|[训练模型](https://paddleocr.bj.bcebos.com/sr_tsrn_transformer_strock_train.tar)| +|Text Telescope|tbsrn|21.56|0.7411| [configs/sr/sr_telescope.yml](../../configs/sr/sr_telescope.yml)|[训练模型](https://paddleocr.bj.bcebos.com/contribution/sr_telescope_train.tar)| + + + +### 1.4 公式识别算法 + +已支持的公式识别算法列表(戳链接获取使用教程): +- [x] [CAN](./algorithm_rec_can.md.md) + +在CROHME手写公式数据集上,算法效果如下: + +|模型 |骨干网络|配置文件|ExpRate|下载链接| +| ----- | ----- | ----- | ----- | ----- | +|CAN|DenseNet|[rec_d28_can.yml](../../configs/rec/rec_d28_can.yml)|51.72%|[训练模型](https://paddleocr.bj.bcebos.com/contribution/rec_d28_can_train.tar)| + ## 2. 端到端算法 diff --git a/doc/doc_en/algorithm_overview_en.md b/doc/doc_en/algorithm_overview_en.md index 309d074e..fad0fb8a 100755 --- a/doc/doc_en/algorithm_overview_en.md +++ b/doc/doc_en/algorithm_overview_en.md @@ -3,6 +3,8 @@ - [1. Two-stage OCR Algorithms](#1) - [1.1 Text Detection Algorithms](#11) - [1.2 Text Recognition Algorithms](#12) + - [1.3 Text Super-Resolution Algorithms](#13) + - [1.4 Formula Recognition Algorithm](#14) - [2. End-to-end OCR Algorithms](#2) - [3. Table Recognition Algorithms](#3) - [4. Key Information Extraction Algorithms](#4) @@ -104,6 +106,36 @@ Refer to [DTRB](https://arxiv.org/abs/1904.01906), the training and evaluation r |RobustScanner|ResNet31| 87.77% | rec_r31_robustscanner | [trained model](https://paddleocr.bj.bcebos.com/contribution/rec_r31_robustscanner.tar)| |RFL|ResNetRFL| 88.63% | rec_resnet_rfl_att | [trained model](https://paddleocr.bj.bcebos.com/contribution/rec_resnet_rfl_att_train.tar) | + + +### 1.3 Text Super-Resolution Algorithms + +Supported text super-resolution algorithms (Click the link to get the tutorial): +- [x] [Text Gestalt](./algorithm_sr_gestalt.md) +- [x] [Text Telescope](./algorithm_sr_telescope.md) + +On the TextZoom public dataset, the effect of the algorithm is as follows: + +|Model|Backbone|PSNR_Avg|SSIM_Avg|Config|Download link| +|---|---|---|---|---|---| +|Text Gestalt|tsrn|19.28|0.6560| [configs/sr/sr_tsrn_transformer_strock.yml](../../configs/sr/sr_tsrn_transformer_strock.yml)|[trained model](https://paddleocr.bj.bcebos.com/sr_tsrn_transformer_strock_train.tar)| +|Text Telescope|tbsrn|21.56|0.7411| [configs/sr/sr_telescope.yml](../../configs/sr/sr_telescope.yml)|[trained model](https://paddleocr.bj.bcebos.com/contribution/sr_telescope_train.tar)| + + + +### 1.4 Formula Recognition Algorithm + +Supported formula recognition algorithms (Click the link to get the tutorial): + +- [x] [CAN](./algorithm_rec_can.md.md) + +On the CROHME handwritten formula dataset, the effect of the algorithm is as follows: + +|Model |Backbone|Config|ExpRate|Download link| +| ----- | ----- | ----- | ----- | ----- | +|CAN|DenseNet|[rec_d28_can.yml](../../configs/rec/rec_d28_can.yml)|51.72%|[trained model](https://paddleocr.bj.bcebos.com/contribution/rec_d28_can_train.tar)| + + ## 2. End-to-end OCR Algorithms @@ -122,7 +154,7 @@ On the PubTabNet dataset, the algorithm result is as follows: |Model|Backbone|Config|Acc|Download link| |---|---|---|---|---| -|TableMaster|TableResNetExtra|[configs/table/table_master.yml](../../configs/table/table_master.yml)|77.47%|[trained](https://paddleocr.bj.bcebos.com/ppstructure/models/tablemaster/table_structure_tablemaster_train.tar) / [inference model](https://paddleocr.bj.bcebos.com/ppstructure/models/tablemaster/table_structure_tablemaster_infer.tar)| +|TableMaster|TableResNetExtra|[configs/table/table_master.yml](../../configs/table/table_master.yml)|77.47%|[trained model](https://paddleocr.bj.bcebos.com/ppstructure/models/tablemaster/table_structure_tablemaster_train.tar) / [inference model](https://paddleocr.bj.bcebos.com/ppstructure/models/tablemaster/table_structure_tablemaster_infer.tar)| -- Gitee From 6660e3b4955481364b26f00d7c2310e043c5ca64 Mon Sep 17 00:00:00 2001 From: Evezerest <50011306+Evezerest@users.noreply.github.com> Date: Wed, 16 Nov 2022 14:47:34 +0800 Subject: [PATCH 012/112] Update quickstart.md --- ppstructure/docs/quickstart.md | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/ppstructure/docs/quickstart.md b/ppstructure/docs/quickstart.md index 287b0d13..9909f795 100644 --- a/ppstructure/docs/quickstart.md +++ b/ppstructure/docs/quickstart.md @@ -104,19 +104,6 @@ paddleocr --image_dir=ppstructure/recovery/UnrealText.pdf --type=structure --rec 通过OCR技术: -版面恢复分为2种方法,详细介绍请参考:[版面恢复教程](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/ppstructure/recovery/README_ch.md): - -- PDF解析 -- OCR技术 - -通过PDF解析(只支持pdf格式的输入): - -```bash -paddleocr --image_dir=ppstructure/recovery/UnrealText.pdf --type=structure --recovery=true --use_pdf2docx_api=true -``` - -通过OCR技术: - ```bash # 中文测试图 paddleocr --image_dir=ppstructure/docs/table/1.png --type=structure --recovery=true -- Gitee From 44e60e81cc9a6e801f31080205734ff3069df675 Mon Sep 17 00:00:00 2001 From: zhoujun Date: Thu, 17 Nov 2022 15:54:54 +0800 Subject: [PATCH 013/112] cp of 8353 (#8354) * add can, stt to algorithm_overview_en.md * update table recognition finetune --- doc/doc_ch/table_recognition.md | 37 +++++++++++++++++++++++++++++- doc/doc_en/table_recognition_en.md | 37 +++++++++++++++++++++++++++++- 2 files changed, 72 insertions(+), 2 deletions(-) diff --git a/doc/doc_ch/table_recognition.md b/doc/doc_ch/table_recognition.md index 156ba80e..f09dedd0 100644 --- a/doc/doc_ch/table_recognition.md +++ b/doc/doc_ch/table_recognition.md @@ -14,6 +14,9 @@ - [2.5. 分布式训练](#25-分布式训练) - [2.6. 其他训练环境](#26-其他训练环境) - [2.7. 模型微调](#27-模型微调) + - [2.7.1 数据选择](#271-数据选择) + - [2.7.2 模型选择](#272-模型选择) + - [2.7.3 训练超参选择](#273-训练超参选择) - [3. 模型评估与预测](#3-模型评估与预测) - [3.1. 指标评估](#31-指标评估) - [3.2. 测试表格结构识别效果](#32-测试表格结构识别效果) @@ -219,7 +222,39 @@ DCU设备上运行需要设置环境变量 `export HIP_VISIBLE_DEVICES=0,1,2,3` ## 2.7. 模型微调 -实际使用过程中,建议加载官方提供的预训练模型,在自己的数据集中进行微调,关于模型的微调方法,请参考:[模型微调教程](./finetune.md)。 +### 2.7.1 数据选择 + +数据量:建议至少准备2000张的表格识别数据集用于模型微调。 + +### 2.7.2 模型选择 + +建议选择SLANet模型(配置文件:[SLANet_ch.yml](../../configs/table/SLANet_ch.yml),预训练模型:[ch_ppstructure_mobile_v2.0_SLANet_train.tar](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_train.tar))进行微调,其精度与泛化性能是目前提供的最优中文表格预训练模型。 + +更多表格识别模型,请参考[PP-Structure 系列模型库](../../ppstructure/docs/models_list.md)。 + +### 2.7.3 训练超参选择 + +在模型微调的时候,最重要的超参就是预训练模型路径`pretrained_model`, 学习率`learning_rate`,部分配置文件如下所示。 + +```yaml +Global: + pretrained_model: ./ch_ppstructure_mobile_v2.0_SLANet_train/best_accuracy.pdparams # 预训练模型路径 +Optimizer: + lr: + name: Cosine + learning_rate: 0.001 # + warmup_epoch: 0 + regularizer: + name: 'L2' + factor: 0 +``` + +上述配置文件中,首先需要将`pretrained_model`字段指定为`best_accuracy.pdparams`文件路径。 + +PaddleOCR提供的配置文件是在4卡训练(相当于总的batch size是`4*48=192`)、且没有加载预训练模型情况下的配置文件,因此您的场景中,学习率与总的batch size需要对应线性调整,例如 + +* 如果您的场景中是单卡训练,单卡batch_size=48,则总的batch_size=48,建议将学习率调整为`0.00025`左右。 +* 如果您的场景中是单卡训练,由于显存限制,只能设置单卡batch_size=32,则总的batch_size=32,建议将学习率调整为`0.00017`左右。 # 3. 模型评估与预测 diff --git a/doc/doc_en/table_recognition_en.md b/doc/doc_en/table_recognition_en.md index cff2933d..d79d9893 100644 --- a/doc/doc_en/table_recognition_en.md +++ b/doc/doc_en/table_recognition_en.md @@ -14,6 +14,9 @@ This article provides a full-process guide for the PaddleOCR table recognition m - [2.5. Distributed Training](#25-distributed-training) - [2.6. Training on other platform(Windows/macOS/Linux DCU)](#26-training-on-other-platformwindowsmacoslinux-dcu) - [2.7. Fine-tuning](#27-fine-tuning) + - [2.7.1 Dataset](#271-dataset) + - [2.7.2 model selection](#272-model-selection) + - [2.7.3 Training hyperparameter selection](#273-training-hyperparameter-selection) - [3. Evaluation and Test](#3-evaluation-and-test) - [3.1. Evaluation](#31-evaluation) - [3.2. Test table structure recognition effect](#32-test-table-structure-recognition-effect) @@ -226,8 +229,40 @@ Running on a DCU device requires setting the environment variable `export HIP_VI ## 2.7. Fine-tuning -In the actual use process, it is recommended to load the officially provided pre-training model and fine-tune it in your own data set. For the fine-tuning method of the table recognition model, please refer to: [Model fine-tuning tutorial](./finetune.md). +### 2.7.1 Dataset + +Data number: It is recommended to prepare at least 2000 table recognition datasets for model fine-tuning. + +### 2.7.2 model selection + +It is recommended to choose the SLANet model (configuration file: [SLANet_ch.yml](../../configs/table/SLANet_ch.yml), pre-training model: [ch_ppstructure_mobile_v2.0_SLANet_train.tar](https://paddleocr.bj.bcebos .com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_train.tar)) for fine-tuning, its accuracy and generalization performance is the best Chinese table pre-training model currently available. + +For more table recognition models, please refer to [PP-Structure Series Model Library](../../ppstructure/docs/models_list.md). + +### 2.7.3 Training hyperparameter selection + +When fine-tuning the model, the most important hyperparameters are the pretrained model path `pretrained_model`, the learning rate `learning_rate`, and some configuration files are shown below. + +```yaml +Global: + pretrained_model: ./ch_ppstructure_mobile_v2.0_SLANet_train/best_accuracy.pdparams # Pre-trained model path +Optimizer: + lr: + name: Cosine + learning_rate: 0.001 # + warmup_epoch: 0 + regularizer: + name: 'L2' + factor: 0 +``` + +In the above configuration file, you first need to specify the `pretrained_model` field as the `best_accuracy.pdparams` file path. + +The configuration file provided by PaddleOCR is for 4-card training (equivalent to a total batch size of `4*48=192`) and no pre-trained model is loaded. Therefore, in your scenario, the learning rate is the same as the total The batch size needs to be adjusted linearly, for example + +* If your scenario is single card training, single card batch_size=48, then the total batch_size=48, it is recommended to adjust the learning rate to about `0.00025`. +* If your scenario is for single-card training, due to memory limitations, you can only set batch_size=32 for a single card, then the total batch_size=32, it is recommended to adjust the learning rate to about `0.00017`. # 3. Evaluation and Test -- Gitee From 7b61b8f3d0e674d1e8d6b5b5eeda117a0c30a98c Mon Sep 17 00:00:00 2001 From: user1018 <614803115@qq.com> Date: Fri, 18 Nov 2022 10:14:26 +0800 Subject: [PATCH 014/112] update recovery (#8358) --- ppstructure/predict_system.py | 4 +++- ppstructure/recovery/recovery_to_doc.py | 2 +- ppstructure/recovery/requirements.txt | 1 - ppstructure/recovery/table_process.py | 2 -- requirements.txt | 2 +- 5 files changed, 5 insertions(+), 6 deletions(-) diff --git a/ppstructure/predict_system.py b/ppstructure/predict_system.py index bb061c99..b32b7062 100644 --- a/ppstructure/predict_system.py +++ b/ppstructure/predict_system.py @@ -229,7 +229,9 @@ def main(args): if args.recovery and args.use_pdf2docx_api and flag_pdf: from pdf2docx.converter import Converter - docx_file = os.path.join(args.output, '{}.docx'.format(img_name)) + os.makedirs(args.output, exist_ok=True) + docx_file = os.path.join(args.output, + '{}_api.docx'.format(img_name)) cv = Converter(image_file) cv.convert(docx_file) cv.close() diff --git a/ppstructure/recovery/recovery_to_doc.py b/ppstructure/recovery/recovery_to_doc.py index 1d8f8d9d..05018120 100644 --- a/ppstructure/recovery/recovery_to_doc.py +++ b/ppstructure/recovery/recovery_to_doc.py @@ -73,7 +73,7 @@ def convert_info_docx(img, res, save_folder, img_name): text_run.font.size = shared.Pt(10) # save to docx - docx_path = os.path.join(save_folder, '{}.docx'.format(img_name)) + docx_path = os.path.join(save_folder, '{}_ocr.docx'.format(img_name)) doc.save(docx_path) logger.info('docx save to {}'.format(docx_path)) diff --git a/ppstructure/recovery/requirements.txt b/ppstructure/recovery/requirements.txt index ec08f9d0..761b9d7c 100644 --- a/ppstructure/recovery/requirements.txt +++ b/ppstructure/recovery/requirements.txt @@ -1,5 +1,4 @@ python-docx -PyMuPDF==1.19.0 beautifulsoup4 fonttools>=4.24.0 fire>=0.3.0 diff --git a/ppstructure/recovery/table_process.py b/ppstructure/recovery/table_process.py index 982e6b76..77a6ef76 100644 --- a/ppstructure/recovery/table_process.py +++ b/ppstructure/recovery/table_process.py @@ -278,8 +278,6 @@ class HtmlToDocx(HTMLParser): cell_col += colspan cell_row += 1 - doc.save('1.docx') - def handle_data(self, data): if self.skip: return diff --git a/requirements.txt b/requirements.txt index 8c5b12f8..b6dd6e57 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,4 +16,4 @@ openpyxl attrdict Polygon3 lanms-neo==1.0.2 -PyMuPDF==1.19.0 \ No newline at end of file +PyMuPDF<1.21.0 \ No newline at end of file -- Gitee From d1a3c1bc1ff71da33f0ef21229740aa443e25347 Mon Sep 17 00:00:00 2001 From: Evezerest <50011306+Evezerest@users.noreply.github.com> Date: Fri, 18 Nov 2022 10:55:57 +0800 Subject: [PATCH 015/112] Update ocr_book_en.md --- doc/doc_en/ocr_book_en.md | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/doc/doc_en/ocr_book_en.md b/doc/doc_en/ocr_book_en.md index b0455fe6..63162be5 100644 --- a/doc/doc_en/ocr_book_en.md +++ b/doc/doc_en/ocr_book_en.md @@ -1,6 +1,6 @@ # E-book: *Dive Into OCR* -"Dive Into OCR" is a textbook that combines OCR theory and practice, written by the PaddleOCR team, Chen Zhineng, a Pre-tenure Professor at Fudan University, Huang Wenhui, a senior expert in the field of vision at China Mobile Research Institute, and other industry-university-research colleagues, as well as OCR developers. The main features are as follows: +"Dive Into OCR" is a textbook that combines OCR theory and practice, written by the PaddleOCR community. The main features are as follows: - OCR full-stack technology covering text detection, recognition and document analysis - Closely integrate theory and practice, cross the code implementation gap, and supporting instructional videos @@ -8,6 +8,10 @@ ## Structure +
+ +
+ - The first part is the preliminary knowledge of the book, including the knowledge index and resource links needed in the process of positioning and using the book content of the book - The second part is chapters 4-8 of the book, which introduce the concepts, applications, and industry practices related to the detection and identification capabilities of the OCR engine. In the "Introduction to OCR Technology", the application scenarios and challenges of OCR, the basic concepts of technology, and the pain points in industrial applications are comprehensively explained. Then, in the two chapters of "Text Detection" and "Text Recognition", the two basic tasks of OCR are introduced. In each chapter, an algorithm is accompanied by a detailed explanation of the code and practical exercises. Chapters 6 and 7 are a detailed introduction to the PP-OCR series model, PP-OCR is a set of OCR systems for industrial applications, on the basis of the basic detection and identification model, after a series of optimization strategies to achieve the general field of industrial SOTA model, while opening up a variety of predictive deployment solutions, enabling enterprises to quickly land OCR applications. @@ -16,6 +20,11 @@ ## Address -- [E-book: *Dive Into OCR* (link generating)]() -- [Jupyter notebook](../../notebook/notebook_en/) -- [videos (Chinese only)](https://aistudio.baidu.com/aistudio/education/group/info/25207) +- [E-book: *Dive Into OCR* (PDF)](https://paddleocr.bj.bcebos.com/ebook/Dive_into_OCR.pdf) +- [Notebook (.ipynb)](https://github.com/PaddleOCR-Community/Dive-into-OCR) +- [Videos (Chinese only)](https://aistudio.baidu.com/aistudio/education/group/info/25207) + + +trackgit-views + + -- Gitee From 0dc155669907f6b49354a68a3b83c833b9aadb35 Mon Sep 17 00:00:00 2001 From: Evezerest <50011306+Evezerest@users.noreply.github.com> Date: Mon, 21 Nov 2022 20:45:24 +0800 Subject: [PATCH 016/112] Fix PPOCRLabel bugs, release PPOCRLabel 2.1.3 (#8402) * Fix PPOCRLabel bugs, release PPOCRLabel 2.1.3 Fix PPOCRLabel bugs due to the upgrade of paddleocr, release PPOCRLabel 2.1.3 * Update PPOCRLabel's requirements Update PPOCRLabel's requirements * Update PPOCRLabel README Update PPOCRLabel README * Update PPOCRLabel Update PPOCRLabel --- PPOCRLabel/PPOCRLabel.py | 13 +++++++------ PPOCRLabel/README.md | 4 ++-- PPOCRLabel/README_ch.md | 4 ++-- PPOCRLabel/libs/autoDialog.py | 2 +- PPOCRLabel/requirements.txt | 2 +- PPOCRLabel/setup.py | 4 ++-- 6 files changed, 15 insertions(+), 14 deletions(-) diff --git a/PPOCRLabel/PPOCRLabel.py b/PPOCRLabel/PPOCRLabel.py index 0a3ae1cb..6c8154d1 100644 --- a/PPOCRLabel/PPOCRLabel.py +++ b/PPOCRLabel/PPOCRLabel.py @@ -1617,8 +1617,9 @@ class MainWindow(QMainWindow): key_cls = 'None' if not self.kie_mode else box.get('key_cls', 'None') shapes.append((box['transcription'], box['points'], None, key_cls, box.get('difficult', False))) - self.loadLabels(shapes) - self.canvas.verified = False + if shapes != []: + self.loadLabels(shapes) + self.canvas.verified = False def validFilestate(self, filePath): if filePath not in self.fileStatedict.keys(): @@ -2203,7 +2204,7 @@ class MainWindow(QMainWindow): msg = 'Can not recognise the detection box in ' + self.filePath + '. Please change manually' QMessageBox.information(self, "Information", msg) return - result = self.ocr.ocr(img_crop, cls=True, det=False) + result = self.ocr.ocr(img_crop, cls=True, det=False)[0] if result[0][0] != '': if shape.line_color == DEFAULT_LOCK_COLOR: shape.label = result[0][0] @@ -2264,7 +2265,7 @@ class MainWindow(QMainWindow): msg = 'Can not recognise the detection box in ' + self.filePath + '. Please change manually' QMessageBox.information(self, "Information", msg) return - result = self.ocr.ocr(img_crop, cls=True, det=False) + result = self.ocr.ocr(img_crop, cls=True, det=False)[0] if result[0][0] != '': result.insert(0, box) print('result in reRec is ', result) @@ -2415,12 +2416,12 @@ class MainWindow(QMainWindow): # merge the text result in the cell texts = '' probs = 0. # the probability of the cell is avgerage prob of every text box in the cell - bboxes = self.ocr.ocr(img_crop, det=True, rec=False, cls=False) + bboxes = self.ocr.ocr(img_crop, det=True, rec=False, cls=False)[0] if len(bboxes) > 0: bboxes.reverse() # top row text at first for _bbox in bboxes: patch = get_rotate_crop_image(img_crop, np.array(_bbox, np.float32)) - rec_res = self.ocr.ocr(patch, det=False, rec=True, cls=False) + rec_res = self.ocr.ocr(patch, det=False, rec=True, cls=False)[0] text = rec_res[0][0] if text != '': texts += text + ('' if text[0].isalpha() else ' ') # add space between english word diff --git a/PPOCRLabel/README.md b/PPOCRLabel/README.md index 9c483e1f..ec933a83 100644 --- a/PPOCRLabel/README.md +++ b/PPOCRLabel/README.md @@ -103,11 +103,11 @@ python PPOCRLabel.py --kie True # [KIE mode] for [detection + recognition + keyw ``` #### 1.2.3 Build and Install the Whl Package Locally -Compile and install a new whl package, where 1.0.2 is the version number, you can specify the new version in 'setup.py'. +Compile and install a new whl package, where 0.0.0 is the version number, you can specify the new version in 'setup.py'. ```bash cd ./PPOCRLabel python3 setup.py bdist_wheel -pip3 install dist/PPOCRLabel-2.1.2-py2.py3-none-any.whl +pip3 install dist/PPOCRLabel-0.0.0-py2.py3-none-any.whl ``` diff --git a/PPOCRLabel/README_ch.md b/PPOCRLabel/README_ch.md index afe1a08f..5359afc4 100644 --- a/PPOCRLabel/README_ch.md +++ b/PPOCRLabel/README_ch.md @@ -101,12 +101,12 @@ python PPOCRLabel.py --lang ch #### 1.2.3 本地构建whl包并安装 -编译与安装新的whl包,其中1.0.2为版本号,可在 `setup.py` 中指定新版本。 +编译与安装新的whl包,其中0.0.0为版本号,可在 `setup.py` 中指定新版本。 ```bash cd ./PPOCRLabel python3 setup.py bdist_wheel -pip3 install dist/PPOCRLabel-2.1.2-py2.py3-none-any.whl -i https://mirror.baidu.com/pypi/simple +pip3 install dist/PPOCRLabel-0.0.0-py2.py3-none-any.whl -i https://mirror.baidu.com/pypi/simple ``` diff --git a/PPOCRLabel/libs/autoDialog.py b/PPOCRLabel/libs/autoDialog.py index 189a590d..55636eec 100644 --- a/PPOCRLabel/libs/autoDialog.py +++ b/PPOCRLabel/libs/autoDialog.py @@ -40,7 +40,7 @@ class Worker(QThread): if self.model == 'paddle': h, w, _ = cv2.imdecode(np.fromfile(Imgpath, dtype=np.uint8), 1).shape if h > 32 and w > 32: - self.result_dic = self.ocr.ocr(Imgpath, cls=True, det=True) + self.result_dic = self.ocr.ocr(Imgpath, cls=True, det=True)[0] else: print('The size of', Imgpath, 'is too small to be recognised') self.result_dic = None diff --git a/PPOCRLabel/requirements.txt b/PPOCRLabel/requirements.txt index fd42a2de..a10b3453 100644 --- a/PPOCRLabel/requirements.txt +++ b/PPOCRLabel/requirements.txt @@ -1,3 +1,3 @@ pyqt5 -paddleocr==2.6.0.0 +paddleocr xlrd==1.2.0 diff --git a/PPOCRLabel/setup.py b/PPOCRLabel/setup.py index a112df54..9770b632 100644 --- a/PPOCRLabel/setup.py +++ b/PPOCRLabel/setup.py @@ -33,10 +33,10 @@ setup( package_dir={'PPOCRLabel': ''}, include_package_data=True, entry_points={"console_scripts": ["PPOCRLabel= PPOCRLabel.PPOCRLabel:main"]}, - version='2.1.2', + version='2.1.3', install_requires=requirements, license='Apache License 2.0', - description='PPOCRLabel is a semi-automatic graphic annotation tool suitable for OCR field, with built-in PPOCR model to automatically detect and re-recognize data. It is written in python3 and pyqt5, supporting rectangular box annotation and four-point annotation modes. Annotations can be directly used for the training of PPOCR detection and recognition models', + description='PPOCRLabelv2 is a semi-automatic graphic annotation tool suitable for OCR field, with built-in PP-OCR model to automatically detect and re-recognize data. It is written in Python3 and PyQT5, supporting rectangular box, table, irregular text and key information annotation modes. Annotations can be directly used for the training of PP-OCR detection and recognition models.', long_description=readme(), long_description_content_type='text/markdown', url='https://github.com/PaddlePaddle/PaddleOCR', -- Gitee From 9873d47514b1824d801ed9ab430420ded2d5ce01 Mon Sep 17 00:00:00 2001 From: Wang Xin Date: Tue, 22 Nov 2022 11:40:02 +0800 Subject: [PATCH 017/112] Update readme.md (#8388) fix wrong link of Model inference --- deploy/cpp_infer/readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/cpp_infer/readme.md b/deploy/cpp_infer/readme.md index d176ff98..8c6c5122 100644 --- a/deploy/cpp_infer/readme.md +++ b/deploy/cpp_infer/readme.md @@ -158,7 +158,7 @@ build/paddle_inference_install_dir/ ### 2.1 Export the inference model -* You can refer to [Model inference](../../doc/doc_ch/inference.md) and export the inference model. After the model is exported, assuming it is placed in the `inference` directory, the directory structure is as follows. +* You can refer to [Model inference](../../doc/doc_en/inference_en.md) and export the inference model. After the model is exported, assuming it is placed in the `inference` directory, the directory structure is as follows. ``` inference/ -- Gitee From 4dd3d01d75501ac3312fe0c8d4884d54d402c15d Mon Sep 17 00:00:00 2001 From: Evezerest <50011306+Evezerest@users.noreply.github.com> Date: Fri, 25 Nov 2022 16:19:30 +0800 Subject: [PATCH 018/112] Update README.md --- deploy/paddlejs/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/paddlejs/README.md b/deploy/paddlejs/README.md index eef39b6c..a73a5af2 100644 --- a/deploy/paddlejs/README.md +++ b/deploy/paddlejs/README.md @@ -5,7 +5,7 @@ English| [简体中文](README_ch.md) [Paddle.js](https://github.com/PaddlePaddle/Paddle.js) is a web project for Baidu PaddlePaddle, which is an open source deep learning framework running in the browser. Paddle.js can either load a pre-trained model, or transforming a model from paddle-hub with model transforming tools provided by Paddle.js. It could run in every browser with WebGL/WebGPU/WebAssembly supported. It could also run in Baidu Smartprogram and wechat miniprogram. ## Web Demo -Run OCR demo in browser refer to [tutorial](https://github.com/PaddlePaddle/FastDeploy/blob/develop/examples/application/js/WebDemo.md). +Run OCR demo in browser refer to [tutorial](https://github.com/PaddlePaddle/FastDeploy/blob/develop/examples/application/js/WebDemo_en.md). |demo|web demo dicrctory|visualization| |-|-|-| -- Gitee From 61fabdc09bdc999f5f7b33b8adffc0c965821f4b Mon Sep 17 00:00:00 2001 From: Evezerest <50011306+Evezerest@users.noreply.github.com> Date: Fri, 25 Nov 2022 16:25:23 +0800 Subject: [PATCH 019/112] Update README (#8444) Update README --- README.md | 9 ++++---- README_ch.md | 28 ++++++++----------------- doc/doc_en/algorithm_sr_telescope_en.md | 2 +- 3 files changed, 14 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index 8e869f6d..1a73daf5 100644 --- a/README.md +++ b/README.md @@ -26,12 +26,10 @@ PaddleOCR aims to create multilingual, awesome, leading, and practical OCR tools ## 📣 Recent updates -- 💥 **Live Preview: Oct 24 - Oct 26, China Standard Time, 20:30**, Engineers@PaddleOCR will show PP-StructureV2 optimization strategy for 3 days. - - Scan the QR code below using WeChat, follow the PaddlePaddle official account and fill out the questionnaire to join the WeChat group, get the live link and 20G OCR learning materials (including PDF2Word application, 10 models in vertical scenarios, etc.) +- 🔨**2022.11 Add implementation of [4 cutting-edge algorithms](doc/doc_ch/algorithm_overview.md)**:Text Detection [DRRG](doc/doc_en/algorithm_det_drrg_en.md), Text Recognition [RFL](./doc/doc_en/algorithm_rec_rfl_en.md), Image Super-Resolution [Text Telescope](doc/doc_en/algorithm_sr_telescope_en.md),Handwrittem Mathematical Expression Recognition [CAN](doc/doc_en/algorithm_rec_can_en.md) +- **2022.10 release [optimized JS version PP-OCRv3 model](./deploy/paddlejs/README.md)** with 4.3M model size, 8x faster inference time, and a ready-to-use web demo +- 💥 **Live Playback: Introduction to PP-StructureV2 optimization strategy **. Scan [the QR code below](#Community) using WeChat, follow the PaddlePaddle official account and fill out the questionnaire to join the WeChat group, get the live link and 20G OCR learning materials (including PDF2Word application, 10 models in vertical scenarios, etc.) -
- -
- **🔥2022.8.24 Release PaddleOCR [release/2.6](https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.6)** - Release [PP-StructureV2](./ppstructure/),with functions and performance fully upgraded, adapted to Chinese scenes, and new support for [Layout Recovery](./ppstructure/recovery) and **one line command to convert PDF to Word**; @@ -74,6 +72,7 @@ PaddleOCR support a variety of cutting-edge algorithms related to OCR, and devel - [Dive Into OCR ](./doc/doc_en/ocr_book_en.md) + ## 👫 Community - For international developers, we regard [PaddleOCR Discussions](https://github.com/PaddlePaddle/PaddleOCR/discussions) as our international community platform. All ideas and questions can be discussed here in English. diff --git a/README_ch.md b/README_ch.md index 5fec27bd..3670c0dd 100755 --- a/README_ch.md +++ b/README_ch.md @@ -27,27 +27,17 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库,助力 ## 📣 近期更新 -- **💥 直播预告:10.24-10.26日每晚8点半**,PaddleOCR研发团队详解PP-StructureV2优化策略。微信扫描下方二维码,关注公众号并填写问卷后进入官方交流群,获取直播链接与20G重磅OCR学习大礼包(内含PDF转Word应用程序、10种垂类模型、《动手学OCR》电子书等) +- 🔨**2022.11 新增实现[4种前沿算法](doc/doc_ch/algorithm_overview.md)**:文本检测 [DRRG](doc/doc_ch/algorithm_det_drrg.md), 文本识别 [RFL](doc/doc_ch/algorithm_rec_rfl.md), 文本超分[Text Telescope](doc/doc_ch/algorithm_sr_telescope.md),公式识别[CAN](doc/doc_ch/algorithm_rec_can.md.md) +- **2022.10 优化[JS版PP-OCRv3模型](./deploy/paddlejs/README_ch.md)**:模型大小仅4.3M,预测速度提升8倍,配套web demo开箱即用 +- **💥 直播回放:PaddleOCR研发团队详解PP-StructureV2优化策略**。微信扫描[下方二维码](#开源社区),关注公众号并填写问卷后进入官方交流群,获取直播回放链接与20G重磅OCR学习大礼包(内含PDF转Word应用程序、10种垂类模型、《动手学OCR》电子书等) -
- -
- **🔥2022.8.24 发布 PaddleOCR [release/2.6](https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.6)** - 发布[PP-StructureV2](./ppstructure/README_ch.md),系统功能性能全面升级,适配中文场景,新增支持[版面复原](./ppstructure/recovery/README_ch.md),支持**一行命令完成PDF转Word**; - [版面分析](./ppstructure/layout/README_ch.md)模型优化:模型存储减少95%,速度提升11倍,平均CPU耗时仅需41ms; - [表格识别](./ppstructure/table/README_ch.md)模型优化:设计3大优化策略,预测耗时不变情况下,模型精度提升6%; - [关键信息抽取](./ppstructure/kie/README_ch.md)模型优化:设计视觉无关模型结构,语义实体识别精度提升2.8%,关系抽取精度提升9.1%。 - -- **🔥2022.8 发布 [OCR场景应用集合](./applications)** - - - 包含数码管、液晶屏、车牌、高精度SVTR模型、手写体识别等**9个垂类模型**,覆盖通用,制造、金融、交通行业的主要OCR垂类应用。 - - -- **2022.8 新增实现[8种前沿算法](doc/doc_ch/algorithm_overview.md)** - - 文本检测:[FCENet](doc/doc_ch/algorithm_det_fcenet.md), [DB++](doc/doc_ch/algorithm_det_db.md) - - 文本识别:[ViTSTR](doc/doc_ch/algorithm_rec_vitstr.md), [ABINet](doc/doc_ch/algorithm_rec_abinet.md), [VisionLAN](doc/doc_ch/algorithm_rec_visionlan.md), [SPIN](doc/doc_ch/algorithm_rec_spin.md), [RobustScanner](doc/doc_ch/algorithm_rec_robustscanner.md) - - 表格识别:[TableMaster](doc/doc_ch/algorithm_table_master.md) +- **2022.8 发布 [OCR场景应用集合](./applications)**:包含数码管、液晶屏、车牌、高精度SVTR模型、手写体识别等**9个垂类模型**,覆盖通用,制造、金融、交通行业的主要OCR垂类应用。 - **2022.5.9 发布 PaddleOCR [release/2.5](https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.5)** @@ -79,23 +69,23 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库,助力 ## 📚《动手学OCR》电子书 - [《动手学OCR》电子书](./doc/doc_ch/ocr_book.md) - + ## 👫 开源社区 - **📑项目合作:** 如果您是企业开发者且有明确的OCR垂类应用需求,填写[问卷](https://paddle.wjx.cn/vj/QwF7GKw.aspx)后可免费与官方团队展开不同层次的合作。 -- **👫加入社区:** 微信扫描二维码并填写问卷之后,加入交流群领取20G重磅OCR学习大礼包 - - **包括《动手学OCR》电子书** ,配套讲解视频和notebook项目;PaddleOCR历次发版直播课视频; +- **👫加入社区:** **微信扫描二维码并填写问卷之后,加入交流群领取20G重磅OCR学习大礼包** + - **包括《动手学OCR》电子书** ,配套讲解视频和notebook项目;**PaddleOCR历次发版直播课回放链接**; - **OCR场景应用模型集合:** 包含数码管、液晶屏、车牌、高精度SVTR模型、手写体识别等垂类模型,覆盖通用,制造、金融、交通行业的主要OCR垂类应用。 - PDF2Word应用程序;OCR社区优秀开发者项目分享视频。 - **🏅️社区项目**:[社区项目](./doc/doc_ch/thirdparty.md)文档中包含了社区用户**使用PaddleOCR开发的各种工具、应用**以及**为PaddleOCR贡献的功能、优化的文档与代码**等,是官方为社区开发者打造的荣誉墙,也是帮助优质项目宣传的广播站。 - **🎁社区常规赛**:社区常规赛是面向OCR开发者的积分赛事,覆盖文档、代码、模型和应用四大类型,以季度为单位评选并发放奖励,赛题详情与报名方法可参考[链接](https://github.com/PaddlePaddle/PaddleOCR/issues/4982)。
- + +

PaddleOCR官方交流群二维码

- ## 🛠️ PP-OCR系列模型列表(更新中) diff --git a/doc/doc_en/algorithm_sr_telescope_en.md b/doc/doc_en/algorithm_sr_telescope_en.md index 9acb5243..334b58b6 100644 --- a/doc/doc_en/algorithm_sr_telescope_en.md +++ b/doc/doc_en/algorithm_sr_telescope_en.md @@ -27,7 +27,7 @@ Paper: Referring to the [FudanOCR](https://github.com/FudanVI/FudanOCR/tree/main/scene-text-telescope) data download instructions, the effect of the super-score algorithm on the TextZoom test set is as follows: |Model|Backbone|config|Acc|Download link| -|---|---|---|---|---|---| +|---|---|---|---|---| |Text Gestalt|tsrn|21.56|0.7411| [configs/sr/sr_telescope.yml](../../configs/sr/sr_telescope.yml)|[train model](https://paddleocr.bj.bcebos.com/contribution/sr_telescope_train.tar)| The [TextZoom dataset](https://paddleocr.bj.bcebos.com/dataset/TextZoom.tar) comes from two superfraction data sets, RealSR and SR-RAW, both of which contain LR-HR pairs. TextZoom has 17367 pairs of training data and 4373 pairs of test data. -- Gitee From 119b5e9bed887ac4889bb66c569ab4c62e9cc029 Mon Sep 17 00:00:00 2001 From: Evezerest <50011306+Evezerest@users.noreply.github.com> Date: Fri, 25 Nov 2022 16:38:43 +0800 Subject: [PATCH 020/112] Update README.md --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 1a73daf5..8af32c87 100644 --- a/README.md +++ b/README.md @@ -27,8 +27,9 @@ PaddleOCR aims to create multilingual, awesome, leading, and practical OCR tools ## 📣 Recent updates - 🔨**2022.11 Add implementation of [4 cutting-edge algorithms](doc/doc_ch/algorithm_overview.md)**:Text Detection [DRRG](doc/doc_en/algorithm_det_drrg_en.md), Text Recognition [RFL](./doc/doc_en/algorithm_rec_rfl_en.md), Image Super-Resolution [Text Telescope](doc/doc_en/algorithm_sr_telescope_en.md),Handwrittem Mathematical Expression Recognition [CAN](doc/doc_en/algorithm_rec_can_en.md) -- **2022.10 release [optimized JS version PP-OCRv3 model](./deploy/paddlejs/README.md)** with 4.3M model size, 8x faster inference time, and a ready-to-use web demo -- 💥 **Live Playback: Introduction to PP-StructureV2 optimization strategy **. Scan [the QR code below](#Community) using WeChat, follow the PaddlePaddle official account and fill out the questionnaire to join the WeChat group, get the live link and 20G OCR learning materials (including PDF2Word application, 10 models in vertical scenarios, etc.) +- **2022.10 Release [optimized JS version PP-OCRv3 model](./deploy/paddlejs/README.md)** with 4.3M model size, 8x faster inference time, and a ready-to-use web demo + +- 💥 **Live Playback: Introduction to PP-StructureV2 optimization strategy**. Scan [the QR code below](#Community) using WeChat, follow the PaddlePaddle official account and fill out the questionnaire to join the WeChat group, get the live link and 20G OCR learning materials (including PDF2Word application, 10 models in vertical scenarios, etc.) - **🔥2022.8.24 Release PaddleOCR [release/2.6](https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.6)** -- Gitee From 6e4ba8dd0fb827c6171dd39d30ee477ed9564b80 Mon Sep 17 00:00:00 2001 From: Evezerest <50011306+Evezerest@users.noreply.github.com> Date: Fri, 25 Nov 2022 16:41:54 +0800 Subject: [PATCH 021/112] Update README_ch.md --- README_ch.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README_ch.md b/README_ch.md index 3670c0dd..759934a8 100755 --- a/README_ch.md +++ b/README_ch.md @@ -31,14 +31,16 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库,助力 - **2022.10 优化[JS版PP-OCRv3模型](./deploy/paddlejs/README_ch.md)**:模型大小仅4.3M,预测速度提升8倍,配套web demo开箱即用 - **💥 直播回放:PaddleOCR研发团队详解PP-StructureV2优化策略**。微信扫描[下方二维码](#开源社区),关注公众号并填写问卷后进入官方交流群,获取直播回放链接与20G重磅OCR学习大礼包(内含PDF转Word应用程序、10种垂类模型、《动手学OCR》电子书等) - - **🔥2022.8.24 发布 PaddleOCR [release/2.6](https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.6)** - 发布[PP-StructureV2](./ppstructure/README_ch.md),系统功能性能全面升级,适配中文场景,新增支持[版面复原](./ppstructure/recovery/README_ch.md),支持**一行命令完成PDF转Word**; - [版面分析](./ppstructure/layout/README_ch.md)模型优化:模型存储减少95%,速度提升11倍,平均CPU耗时仅需41ms; - [表格识别](./ppstructure/table/README_ch.md)模型优化:设计3大优化策略,预测耗时不变情况下,模型精度提升6%; - [关键信息抽取](./ppstructure/kie/README_ch.md)模型优化:设计视觉无关模型结构,语义实体识别精度提升2.8%,关系抽取精度提升9.1%。 - **2022.8 发布 [OCR场景应用集合](./applications)**:包含数码管、液晶屏、车牌、高精度SVTR模型、手写体识别等**9个垂类模型**,覆盖通用,制造、金融、交通行业的主要OCR垂类应用。 - +- **2022.8 新增实现[8种前沿算法](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.6rc/doc/doc_ch/algorithm_overview.md)** + - 文本检测:[FCENet](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.6rc/doc/doc_ch/algorithm_det_fcenet.md), [DB++](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.6rc/doc/doc_ch/algorithm_det_db.md) + - 文本识别:[ViTSTR](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.6rc/doc/doc_ch/algorithm_rec_vitstr.md), [ABINet](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.6rc/doc/doc_ch/algorithm_rec_abinet.md), [VisionLAN](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.6rc/doc/doc_ch/algorithm_rec_visionlan.md), [SPIN](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.6rc/doc/doc_ch/algorithm_rec_spin.md), [RobustScanner](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.6rc/doc/doc_ch/algorithm_rec_robustscanner.md) + - 表格识别:[TableMaster](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.6rc/doc/doc_ch/algorithm_table_master.md) - **2022.5.9 发布 PaddleOCR [release/2.5](https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.5)** - 发布[PP-OCRv3](./doc/doc_ch/ppocr_introduction.md#pp-ocrv3),速度可比情况下,中文场景效果相比于PP-OCRv2再提升5%,英文场景提升11%,80语种多语言模型平均识别准确率提升5%以上; -- Gitee From 24a362d253bb9c19eb650c973d7fe202113b845a Mon Sep 17 00:00:00 2001 From: Evezerest <50011306+Evezerest@users.noreply.github.com> Date: Mon, 28 Nov 2022 16:53:16 +0800 Subject: [PATCH 022/112] Update README.md --- deploy/paddlejs/README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/deploy/paddlejs/README.md b/deploy/paddlejs/README.md index a73a5af2..8eefc387 100644 --- a/deploy/paddlejs/README.md +++ b/deploy/paddlejs/README.md @@ -24,3 +24,7 @@ Run OCR demo in wechat miniprogram refer to [tutorial](https://github.com/Paddle
+ + +trackgit-views + -- Gitee From b9c17d69901045b302933108d91b55edba9a15df Mon Sep 17 00:00:00 2001 From: Evezerest <50011306+Evezerest@users.noreply.github.com> Date: Mon, 28 Nov 2022 16:53:53 +0800 Subject: [PATCH 023/112] Update README_ch.md --- deploy/paddlejs/README_ch.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/deploy/paddlejs/README_ch.md b/deploy/paddlejs/README_ch.md index 46666130..d6c0f33f 100644 --- a/deploy/paddlejs/README_ch.md +++ b/deploy/paddlejs/README_ch.md @@ -29,3 +29,7 @@
+ + +trackgit-views + -- Gitee From 7a9cfaad9d299668213eaa6a21f88cc750b686b5 Mon Sep 17 00:00:00 2001 From: zhoujun Date: Tue, 29 Nov 2022 10:06:42 +0800 Subject: [PATCH 024/112] fix re inference error (#8475) --- tools/infer_kie_token_ser_re.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/infer_kie_token_ser_re.py b/tools/infer_kie_token_ser_re.py index c4fa2c92..76120a91 100755 --- a/tools/infer_kie_token_ser_re.py +++ b/tools/infer_kie_token_ser_re.py @@ -81,7 +81,7 @@ def make_input(ser_inputs, ser_results): end.append(entity['end']) label.append(entities_labels[res['pred']]) - entities = np.full([max_seq_len + 1, 3], fill_value=-1) + entities = np.full([max_seq_len + 1, 3], fill_value=-1, dtype=np.int64) entities[0, 0] = len(start) entities[1:len(start) + 1, 0] = start entities[0, 1] = len(end) @@ -98,7 +98,7 @@ def make_input(ser_inputs, ser_results): head.append(i) tail.append(j) - relations = np.full([len(head) + 1, 2], fill_value=-1) + relations = np.full([len(head) + 1, 2], fill_value=-1, dtype=np.int64) relations[0, 0] = len(head) relations[1:len(head) + 1, 0] = head relations[0, 1] = len(tail) -- Gitee From 5b27fa5e418d5d6264fa54c8b26c27274846b091 Mon Sep 17 00:00:00 2001 From: MissPenguin Date: Wed, 30 Nov 2022 10:01:04 +0800 Subject: [PATCH 025/112] Update LICENSE --- LICENSE | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/LICENSE b/LICENSE index 261eeb9e..5fe86943 100644 --- a/LICENSE +++ b/LICENSE @@ -1,3 +1,5 @@ +Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved + Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ @@ -186,7 +188,7 @@ same "printed page" as the copyright notice for easier identification within third-party archives. - Copyright [yyyy] [name of copyright owner] + Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. -- Gitee From be800b27c43701e958f37897d6f0e322c2cdf45f Mon Sep 17 00:00:00 2001 From: Double_V Date: Wed, 30 Nov 2022 12:43:33 +0800 Subject: [PATCH 026/112] [bug] fix config (#8491) * fix PP-OCRv3 det train * fix doc * fix bug --- configs/det/det_res18_db_v2.0.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/configs/det/det_res18_db_v2.0.yml b/configs/det/det_res18_db_v2.0.yml index e983c221..85c574de 100644 --- a/configs/det/det_res18_db_v2.0.yml +++ b/configs/det/det_res18_db_v2.0.yml @@ -22,7 +22,6 @@ Architecture: Backbone: name: ResNet_vd layers: 18 - disable_se: True Neck: name: DBFPN out_channels: 256 -- Gitee From 0c3c48147ee95e8c55f5e00ec1c1cb23c4b25045 Mon Sep 17 00:00:00 2001 From: andyj <87074272+andyjpaddle@users.noreply.github.com> Date: Wed, 30 Nov 2022 19:41:47 +0800 Subject: [PATCH 027/112] add en doc (#8500) --- doc/doc_ch/finetune.md | 3 +- doc/doc_en/detection_en.md | 5 ++ doc/doc_en/finetune_en.md | 167 +++++++++++++++++++++++++++++++++++ doc/doc_en/recognition_en.md | 6 ++ 4 files changed, 179 insertions(+), 2 deletions(-) create mode 100644 doc/doc_en/finetune_en.md diff --git a/doc/doc_ch/finetune.md b/doc/doc_ch/finetune.md index 2aff0c65..f622e6f0 100644 --- a/doc/doc_ch/finetune.md +++ b/doc/doc_ch/finetune.md @@ -136,8 +136,7 @@ Train: ``` - -上述配置文件中,首先需要将`pretrained_model`字段指定为2.2章节中解压得到的`ch_PP-OCRv3_rec_train/best_accuracy.pdparams`文件路径。 +上述配置文件中,首先需要将`pretrained_model`字段指定为3.2章节中解压得到的`ch_PP-OCRv3_rec_train/best_accuracy.pdparams`文件路径。 PaddleOCR提供的配置文件是在8卡训练(相当于总的batch size是`8*128=1024`)、且没有加载预训练模型情况下的配置文件,因此您的场景中,学习率与总的batch size需要对应线性调整,例如: diff --git a/doc/doc_en/detection_en.md b/doc/doc_en/detection_en.md index c215e1a4..ab2e868c 100644 --- a/doc/doc_en/detection_en.md +++ b/doc/doc_en/detection_en.md @@ -13,6 +13,7 @@ This section uses the icdar2015 dataset as an example to introduce the training, * [2.5 Distributed Training](#25-distributed-training) * [2.6 Training with knowledge distillation](#26) * [2.7 Training on other platform(Windows/macOS/Linux DCU)](#27) + * [2.8 Fine-tuning](#28) - [3. Evaluation and Test](#3-evaluation-and-test) - [3.1 Evaluation](#31-evaluation) - [3.2 Test](#32-test) @@ -178,6 +179,10 @@ GPU mode is not supported, you need to set `use_gpu` to False in the configurati - Linux DCU Running on a DCU device requires setting the environment variable `export HIP_VISIBLE_DEVICES=0,1,2,3`, and the rest of the training and evaluation prediction commands are exactly the same as the Linux GPU. +### 2.8 Fine-tuning + +In actual use, it is recommended to load the official pre-trained model and fine-tune it in your own data set. For the fine-tuning method of the detection model, please refer to: [Model Fine-tuning Tutorial](./finetune_en.md). + ## 3. Evaluation and Test ### 3.1 Evaluation diff --git a/doc/doc_en/finetune_en.md b/doc/doc_en/finetune_en.md new file mode 100644 index 00000000..54be93f4 --- /dev/null +++ b/doc/doc_en/finetune_en.md @@ -0,0 +1,167 @@ +# Fine-tune + +## 1. background and meaning + +The PP-OCR series models provided by PaddleOCR have excellent performance in general scenarios and can solve detection and recognition problems in most cases. In vertical scenarios, if you want to obtain better model, you can further improve the accuracy of the PP-OCR series detection and recognition models through fine-tune. + +This article mainly introduces some precautions when fine-tuning the text detection and recognition model. Finally, you can obtain a text detection and recognition model with higher accuracy through model fine-tuning in your own scenarios. + +The core points of this article are as follows: + +1. The pre-trained model provided by PP-OCR has better generalization ability +2. Adding a small amount of real data (detection:>=500, recognition:>=5000) will greatly improve the detection and recognition effect of vertical scenes +3. When fine-tuning the model, adding real general scene data can further improve the model accuracy and generalization performance +4. In the text detection task, increasing the prediction shape of the image can further improve the detection effect of the smaller text area +5. When fine-tuning the model, it is necessary to properly adjust the hyperparameters (learning rate, batch size are the most important) to obtain a better fine-tuning effect. + +For more details, please refer to Chapter 2 and Chapter 3。 + +## 2. Text detection model fine-tuning + +### 2.1 Dataset + +* Dataset: It is recommended to prepare at least 500 text detection datasets for model fine-tuning. + +* Dataset annotation: single-line text annotation format, it is recommended that the labeled detection frame be consistent with the actual semantic content. For example, in the train ticket scene, the surname and first name may be far apart, but they belong to the same detection field semantically. Here, the entire name also needs to be marked as a detection frame. + +### 2.2 Model + +It is recommended to choose the PP-OCRv3 model (configuration file: [ch_PP-OCRv3_det_student.yml](../../configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_student.yml),pre-trained model: [ch_PP-OCRv3_det_distill_train.tar](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_distill_train.tar), its accuracy and generalization performance is the best pre-training model currently available. + +For more PP-OCR series models, please refer to [PP-OCR Series Model Library](./models_list_en.md)。 + +Note: When using the above pre-trained model, you need to use the `student.pdparams` file in the folder as the pre-trained model, that is, only use the student model. + + +### 2.3 Training hyperparameter + +When fine-tuning the model, the most important hyperparameter is the pre-training model path `pretrained_model`, `learning_rate`与`batch_size`,some hyperparameters are as follows: + +```yaml +Global: + pretrained_model: ./ch_PP-OCRv3_det_distill_train/student.pdparams # pre-training model path +Optimizer: + lr: + name: Cosine + learning_rate: 0.001 # learning_rate + warmup_epoch: 2 + regularizer: + name: 'L2' + factor: 0 + +Train: + loader: + shuffle: True + drop_last: False + batch_size_per_card: 8 # single gpu batch size + num_workers: 4 +``` + +In the above configuration file, you need to specify the `pretrained_model` field as the `student.pdparams` file path. + +The configuration file provided by PaddleOCR is for 8-gpu training (equivalent to a total batch size of `8*8=64`) and no pre-trained model is loaded. Therefore, in your scenario, the learning rate is the same as the total The batch size needs to be adjusted linearly, for example + +* If your scenario is single-gpu training, single gpu batch_size=8, then the total batch_size=8, it is recommended to adjust the learning rate to about `1e-4`. +* If your scenario is for single-gpu training, due to memory limitations, you can only set batch_size=4 for a single gpu, and the total batch_size=4. It is recommended to adjust the learning rate to about `5e-5`. + +### 2.4 Prediction hyperparameter + +When exporting and inferring the trained model, you can further adjust the predicted image scale to improve the detection effect of small-area text. The following are some hyperparameters during DBNet inference, which can be adjusted appropriately to improve the effect. + +| hyperparameter | type | default | meaning | +| :--: | :--: | :--: | :--: | +| det_db_thresh | float | 0.3 | In the probability map output by DB, pixels with a score greater than the threshold will be considered as text pixels | +| det_db_box_thresh | float | 0.6 | When the average score of all pixels within the frame of the detection result is greater than the threshold, the result will be considered as a text area | +| det_db_unclip_ratio | float | 1.5 | The expansion coefficient of `Vatti clipping`, using this method to expand the text area | +| max_batch_size | int | 10 | batch size | +| use_dilation | bool | False | Whether to expand the segmentation results to obtain better detection results | +| det_db_score_mode | str | "fast" | DB's detection result score calculation method supports `fast` and `slow`. `fast` calculates the average score based on all pixels in the polygon’s circumscribed rectangle border, and `slow` calculates the average score based on all pixels in the original polygon. The calculation speed is relatively slower, but more accurate. | + + +For more information on inference methods, please refer to[Paddle Inference doc](././inference_ppocr_en.md)。 + + +## 3. Text recognition model fine-tuning + + +### 3.1 Dataset + +* Dataset:If the dictionary is not changed, it is recommended to prepare at least 5,000 text recognition datasets for model fine-tuning; if the dictionary is changed (not recommended), more quantities are required. + +* Data distribution: It is recommended that the distribution be as consistent as possible with the actual measurement scenario. If the actual scene contains a lot of short text, it is recommended to include more short text in the training data. If the actual scene has high requirements for the recognition effect of spaces, it is recommended to include more text content with spaces in the training data. + +* Data synthesis: In the case of some character recognition errors, it is recommended to obtain a batch of specific character dataset, add it to the original dataset and use a small learning rate for fine-tuning. The ratio of original dataset to new dataset can be 10:1 to 5:1 to avoid overfitting of the model caused by too much data in a single scene. At the same time, try to balance the word frequency of the corpus to ensure that the frequency of common words will not be too low. + + Specific characters can be generated using the TextRenderer tool, for synthesis examples, please refer to [data synthesis](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.6/applications/%E5%85%89%E5%8A%9F%E7%8E%87%E8%AE%A1%E6%95%B0%E7%A0%81%E7%AE%A1%E5%AD%97%E7%AC%A6%E8%AF%86%E5%88%AB/%E5%85%89%E5%8A%9F%E7%8E%87%E8%AE%A1%E6%95%B0%E7%A0%81%E7%AE%A1%E5%AD%97%E7%AC%A6%E8%AF%86%E5%88%AB.md#31-%E6%95%B0%E6%8D%AE%E5%87%86%E5%A4%87) + . The synthetic data corpus should come from real usage scenarios as much as possible, and keep the richness of fonts and backgrounds on the basis of being close to the real scene, which will help improve the model effect. + +* Common Chinese and English data: During training, common real data can be added to the training set (for example, in the fine-tuning scenario without changing the dictionary, it is recommended to add real data such as LSVT, RCTW, MTWI) to further improve the generalization performance of the model. + +### 3.2 Model + +It is recommended to choose the PP-OCRv3 model (configuration file: [ch_PP-OCRv3_rec_distillation.yml](../../configs/rec/PP-OCRv3/ch_PP-OCRv3_rec_distillation.yml),pre-trained model: [ch_PP-OCRv3_rec_train.tar](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_train.tar),its accuracy and generalization performance is the best pre-training model currently available. + +For more PP-OCR series models, please refer to [PP-OCR Series Model Library](./models_list_en.md)。 + + +### 3.3 Training hyperparameter + +Similar to text detection task fine-tuning, when fine-tuning the recognition model, the most important hyperparameters are the pre-trained model path `pretrained_model`, `learning_rate` and `batch_size`, some default configuration files are shown below. + +```yaml +Global: + pretrained_model: # pre-training model path +Optimizer: + lr: + name: Piecewise + decay_epochs : [700, 800] + values : [0.001, 0.0001] # learning_rate + warmup_epoch: 5 + regularizer: + name: 'L2' + factor: 0 + +Train: + dataset: + name: SimpleDataSet + data_dir: ./train_data/ + label_file_list: + - ./train_data/train_list.txt + ratio_list: [1.0] # Sampling ratio, the default value is [1.0] + loader: + shuffle: True + drop_last: False + batch_size_per_card: 128 # single gpu batch size + num_workers: 8 + +``` + + +In the above configuration file, you first need to specify the `pretrained_model` field as the `ch_PP-OCRv3_rec_train/best_accuracy.pdparams` file path decompressed in Chapter 3.2. + +The configuration file provided by PaddleOCR is for 8-gpu training (equivalent to a total batch size of `8*128=1024`) and no pre-trained model is loaded. Therefore, in your scenario, the learning rate is the same as the total The batch size needs to be adjusted linearly, for example: + +* If your scenario is single-gpu training, single gpu batch_size=128, then the total batch_size=128, in the case of loading the pre-trained model, it is recommended to adjust the learning rate to about `[1e-4, 2e-5]` (For the piecewise learning rate strategy, two values need to be set, the same below). +* If your scenario is for single-gpu training, due to memory limitations, you can only set batch_size=64 for a single gpu, and the total batch_size=64. When loading the pre-trained model, it is recommended to adjust the learning rate to `[5e-5 , 1e-5]`about. + + +If there is general real scene data added, it is recommended that in each epoch, the amount of vertical scene data and real scene data should be kept at about 1:1. + +For example: your own vertical scene recognition data volume is 1W, the data label file is `vertical.txt`, the collected general scene recognition data volume is 10W, and the data label file is `general.txt`. + +Then, the `label_file_list` and `ratio_list` parameters can be set as shown below. In each epoch, `vertical.txt` will be fully sampled (sampling ratio is 1.0), including 1W pieces of data; `general.txt` will be sampled according to a sampling ratio of 0.1, including `10W*0.1=1W` pieces of data, the final ratio of the two is `1:1`. + +```yaml +Train: + dataset: + name: SimpleDataSet + data_dir: ./train_data/ + label_file_list: + - vertical.txt + - general.txt + ratio_list: [1.0, 0.1] +``` + +### 3.4 training optimization + +The training process does not happen overnight. After completing a stage of training evaluation, it is recommended to collect and analyze the badcase of the current model in the real scene, adjust the proportion of training data in a targeted manner, or further add synthetic data. Through multiple iterations of training, the model effect is continuously optimized. diff --git a/doc/doc_en/recognition_en.md b/doc/doc_en/recognition_en.md index 7d31b0ff..78917aea 100644 --- a/doc/doc_en/recognition_en.md +++ b/doc/doc_en/recognition_en.md @@ -15,6 +15,7 @@ * [2.6 Training with knowledge distillation](#kd) * [2.7 Multi-language Training](#Multi_language) * [2.8 Training on other platform(Windows/macOS/Linux DCU)](#28) + * [2.9 Fine-tuning](#29) - [3. Evaluation and Test](#3-evaluation-and-test) * [3.1 Evaluation](#31-evaluation) * [3.2 Test](#32-test) @@ -384,6 +385,11 @@ GPU mode is not supported, you need to set `use_gpu` to False in the configurati - Linux DCU Running on a DCU device requires setting the environment variable `export HIP_VISIBLE_DEVICES=0,1,2,3`, and the rest of the training and evaluation prediction commands are exactly the same as the Linux GPU. + +## 2.9 Fine-tuning + +In actual use, it is recommended to load the official pre-trained model and fine-tune it in your own data set. For the fine-tuning method of the recognition model, please refer to: [Model Fine-tuning Tutorial](./finetune_en.md). + ## 3. Evaluation and Test -- Gitee From 6eb5d3effd422c55c6b4bfd6ec72864f1c6792dc Mon Sep 17 00:00:00 2001 From: andyj <87074272+andyjpaddle@users.noreply.github.com> Date: Mon, 5 Dec 2022 19:52:11 +0800 Subject: [PATCH 028/112] [doc] fix dead link (#8548) * add en doc * fix dead link & test=document_fix * fix dead link & test=document_fix --- doc/doc_ch/whl.md | 2 +- doc/doc_en/whl_en.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/doc_ch/whl.md b/doc/doc_ch/whl.md index 83f06280..ba955c83 100644 --- a/doc/doc_ch/whl.md +++ b/doc/doc_ch/whl.md @@ -294,7 +294,7 @@ paddleocr --image_dir PaddleOCR/doc/imgs_words/ch/word_1.jpg --use_angle_cls tru ## 3 自定义模型 -当内置模型无法满足需求时,需要使用到自己训练的模型。 首先,参照[inference.md](./inference.md) 第一节转换将检测、分类和识别模型转换为inference模型,然后按照如下方式使用 +当内置模型无法满足需求时,需要使用到自己训练的模型。 首先,参照[模型导出](./detection.md#4-模型导出与预测)将检测、分类和识别模型转换为inference模型,然后按照如下方式使用 ### 3.1 代码使用 diff --git a/doc/doc_en/whl_en.md b/doc/doc_en/whl_en.md index 77e80faa..5628dc3f 100644 --- a/doc/doc_en/whl_en.md +++ b/doc/doc_en/whl_en.md @@ -261,7 +261,7 @@ Output will be a list, each item contains classification result and confidence ## 3 Use custom model When the built-in model cannot meet the needs, you need to use your own trained model. -First, refer to the first section of [inference_en.md](./inference_en.md) to convert your det and rec model to inference model, and then use it as follows +First, refer to [export](./detection_en.md#4-inference) doc to convert your det and rec model to inference model, and then use it as follows ### 3.1 Use by code -- Gitee From 34b9569800a38af41a27ed893b12567757ef6c89 Mon Sep 17 00:00:00 2001 From: topduke <784990967@qq.com> Date: Wed, 7 Dec 2022 15:07:50 +0800 Subject: [PATCH 029/112] Update rec_nrtr_head.py (#8564) --- ppocr/modeling/heads/rec_nrtr_head.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/ppocr/modeling/heads/rec_nrtr_head.py b/ppocr/modeling/heads/rec_nrtr_head.py index bf9ef561..2fffa521 100644 --- a/ppocr/modeling/heads/rec_nrtr_head.py +++ b/ppocr/modeling/heads/rec_nrtr_head.py @@ -17,7 +17,6 @@ import paddle from paddle import nn import paddle.nn.functional as F from paddle.nn import LayerList -# from paddle.nn.initializer import XavierNormal as xavier_uniform_ from paddle.nn import Dropout, Linear, LayerNorm import numpy as np from ppocr.modeling.backbones.rec_svtrnet import Mlp, zeros_, ones_ @@ -30,7 +29,6 @@ class Transformer(nn.Layer): Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, Lukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. In Advances in Neural Information Processing Systems, pages 6000-6010. - Args: d_model: the number of expected features in the encoder/decoder inputs (default=512). nhead: the number of heads in the multiheadattention models (default=8). @@ -162,7 +160,7 @@ class Transformer(nn.Layer): memory = src dec_seq = paddle.full((bs, 1), 2, dtype=paddle.int64) dec_prob = paddle.full((bs, 1), 1., dtype=paddle.float32) - for len_dec_seq in range(1, self.max_len): + for len_dec_seq in range(1, paddle.to_tensor(self.max_len)): dec_seq_embed = self.embedding(dec_seq) dec_seq_embed = self.positional_encoding(dec_seq_embed) tgt_mask = self.generate_square_subsequent_mask( @@ -304,7 +302,7 @@ class Transformer(nn.Layer): inst_idx_to_position_map = get_inst_idx_to_tensor_position_map( active_inst_idx_list) # Decode - for len_dec_seq in range(1, self.max_len): + for len_dec_seq in range(1, paddle.to_tensor(self.max_len)): src_enc_copy = src_enc.clone() active_inst_idx_list = beam_decode_step( inst_dec_beams, len_dec_seq, src_enc_copy, @@ -348,15 +346,12 @@ class MultiheadAttention(nn.Layer): """Allows the model to jointly attend to information from different representation subspaces. See reference: Attention Is All You Need - .. math:: \text{MultiHead}(Q, K, V) = \text{Concat}(head_1,\dots,head_h)W^O \text{where} head_i = \text{Attention}(QW_i^Q, KW_i^K, VW_i^V) - Args: embed_dim: total dimension of the model num_heads: parallel attention layers, or heads - """ def __init__(self, embed_dim, num_heads, dropout=0., self_attn=False): -- Gitee From ae9cd5bba1515fa381b6478999f76ab3ff227774 Mon Sep 17 00:00:00 2001 From: Wang Xin Date: Thu, 8 Dec 2022 13:35:37 +0800 Subject: [PATCH 030/112] Update knowledge_distillation_en.md (#8572) remove unnecessary '```' symbol --- doc/doc_en/knowledge_distillation_en.md | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/doc_en/knowledge_distillation_en.md b/doc/doc_en/knowledge_distillation_en.md index 52725e5c..4e1f4e78 100755 --- a/doc/doc_en/knowledge_distillation_en.md +++ b/doc/doc_en/knowledge_distillation_en.md @@ -228,7 +228,6 @@ Architecture: enc_dim: 512 max_text_length: *max_text_length ``` -``` When the model is finally trained, it contains 3 sub-networks: `Teacher`, `Student`, `Student2`. -- Gitee From 42e9130eb74402fec5270f3361cd6d59497c94f2 Mon Sep 17 00:00:00 2001 From: Double_V Date: Thu, 8 Dec 2022 14:00:18 +0800 Subject: [PATCH 031/112] [bug] fix det_res18_db_v2.0.yml (#8579) * fix PP-OCRv3 det train * fix doc * fix bug -- Gitee From fa385979ee13896928f8e6999d0d3f893cc591e9 Mon Sep 17 00:00:00 2001 From: Evezerest <50011306+Evezerest@users.noreply.github.com> Date: Tue, 13 Dec 2022 14:47:45 +0800 Subject: [PATCH 032/112] Update ocr_book.md --- doc/doc_ch/ocr_book.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/doc_ch/ocr_book.md b/doc/doc_ch/ocr_book.md index 03a6011b..b570ae4b 100644 --- a/doc/doc_ch/ocr_book.md +++ b/doc/doc_ch/ocr_book.md @@ -21,5 +21,5 @@ ## 资料地址 - 中文版电子书下载请扫描首页二维码入群后领取 -- [notebook教程](../../notebook/notebook_ch/) +- [notebook教程](https://github.com/PaddleOCR-Community/Dive-into-OCR) - [教学视频](https://aistudio.baidu.com/aistudio/education/group/info/25207) -- Gitee From b28af5d865103dab84a15de549c5ac800f8727a0 Mon Sep 17 00:00:00 2001 From: Evezerest <50011306+Evezerest@users.noreply.github.com> Date: Thu, 15 Dec 2022 16:48:35 +0800 Subject: [PATCH 033/112] Update README_ch.md --- README_ch.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README_ch.md b/README_ch.md index 759934a8..51bf19d3 100755 --- a/README_ch.md +++ b/README_ch.md @@ -27,7 +27,7 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库,助力 ## 📣 近期更新 -- 🔨**2022.11 新增实现[4种前沿算法](doc/doc_ch/algorithm_overview.md)**:文本检测 [DRRG](doc/doc_ch/algorithm_det_drrg.md), 文本识别 [RFL](doc/doc_ch/algorithm_rec_rfl.md), 文本超分[Text Telescope](doc/doc_ch/algorithm_sr_telescope.md),公式识别[CAN](doc/doc_ch/algorithm_rec_can.md.md) +- 🔨**2022.11 新增实现[4种前沿算法](doc/doc_ch/algorithm_overview.md)**:文本检测 [DRRG](doc/doc_ch/algorithm_det_drrg.md), 文本识别 [RFL](doc/doc_ch/algorithm_rec_rfl.md), 文本超分[Text Telescope](doc/doc_ch/algorithm_sr_telescope.md),公式识别[CAN](doc/doc_ch/algorithm_rec_can.md) - **2022.10 优化[JS版PP-OCRv3模型](./deploy/paddlejs/README_ch.md)**:模型大小仅4.3M,预测速度提升8倍,配套web demo开箱即用 - **💥 直播回放:PaddleOCR研发团队详解PP-StructureV2优化策略**。微信扫描[下方二维码](#开源社区),关注公众号并填写问卷后进入官方交流群,获取直播回放链接与20G重磅OCR学习大礼包(内含PDF转Word应用程序、10种垂类模型、《动手学OCR》电子书等) -- Gitee From 23e034c40ecd5755af48d7b14dcc1bf6c5cf1128 Mon Sep 17 00:00:00 2001 From: xiaoting <31891223+tink2123@users.noreply.github.com> Date: Tue, 20 Dec 2022 13:22:03 +0800 Subject: [PATCH 034/112] fix aster loss for axis (#8674) --- ppocr/losses/rec_aster_loss.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ppocr/losses/rec_aster_loss.py b/ppocr/losses/rec_aster_loss.py index 52605e46..9b0a34ee 100644 --- a/ppocr/losses/rec_aster_loss.py +++ b/ppocr/losses/rec_aster_loss.py @@ -28,7 +28,7 @@ class CosineEmbeddingLoss(nn.Layer): def forward(self, x1, x2, target): similarity = paddle.sum( - x1 * x2, dim=-1) / (paddle.norm( + x1 * x2, axis=-1) / (paddle.norm( x1, axis=-1) * paddle.norm( x2, axis=-1) + self.epsilon) one_list = paddle.full_like(target, fill_value=1) -- Gitee From 7e7630eb67ce77f7f65c6dab5bfbea2bebabb8d5 Mon Sep 17 00:00:00 2001 From: Double_V Date: Thu, 22 Dec 2022 11:10:22 +0800 Subject: [PATCH 035/112] fix issue 9659 and doc (#8689) --- deploy/cpp_infer/docs/windows_vs2019_build.md | 2 +- ppocr/postprocess/db_postprocess.py | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/deploy/cpp_infer/docs/windows_vs2019_build.md b/deploy/cpp_infer/docs/windows_vs2019_build.md index bcaefa46..2f5c5818 100644 --- a/deploy/cpp_infer/docs/windows_vs2019_build.md +++ b/deploy/cpp_infer/docs/windows_vs2019_build.md @@ -121,7 +121,7 @@ CUDA_LIB、CUDNN_LIB、TENSORRT_DIR、WITH_GPU、WITH_TENSORRT ``` cd /d D:\projects\cpp\PaddleOCR\deploy\cpp_infer ``` -可执行文件`ppocr.exe`即为样例的预测程序,其主要使用方法如下,更多使用方法可以参考[说明文档](../readme.md)`运行demo`部分。 +可执行文件`ppocr.exe`即为样例的预测程序,其主要使用方法如下,更多使用方法可以参考[说明文档](../readme_ch.md)`运行demo`部分。 ```shell # 切换终端编码为utf8 diff --git a/ppocr/postprocess/db_postprocess.py b/ppocr/postprocess/db_postprocess.py index dfe10781..244825b7 100755 --- a/ppocr/postprocess/db_postprocess.py +++ b/ppocr/postprocess/db_postprocess.py @@ -144,9 +144,9 @@ class DBPostProcess(object): np.round(box[:, 0] / width * dest_width), 0, dest_width) box[:, 1] = np.clip( np.round(box[:, 1] / height * dest_height), 0, dest_height) - boxes.append(box.astype(np.int16)) + boxes.append(box.astype("int32")) scores.append(score) - return np.array(boxes, dtype=np.int16), scores + return np.array(boxes, dtype="int32"), scores def unclip(self, box, unclip_ratio): poly = Polygon(box) @@ -185,15 +185,15 @@ class DBPostProcess(object): ''' h, w = bitmap.shape[:2] box = _box.copy() - xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int), 0, w - 1) - xmax = np.clip(np.ceil(box[:, 0].max()).astype(np.int), 0, w - 1) - ymin = np.clip(np.floor(box[:, 1].min()).astype(np.int), 0, h - 1) - ymax = np.clip(np.ceil(box[:, 1].max()).astype(np.int), 0, h - 1) + xmin = np.clip(np.floor(box[:, 0].min()).astype("int32"), 0, w - 1) + xmax = np.clip(np.ceil(box[:, 0].max()).astype("int32"), 0, w - 1) + ymin = np.clip(np.floor(box[:, 1].min()).astype("int32"), 0, h - 1) + ymax = np.clip(np.ceil(box[:, 1].max()).astype("int32"), 0, h - 1) mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8) box[:, 0] = box[:, 0] - xmin box[:, 1] = box[:, 1] - ymin - cv2.fillPoly(mask, box.reshape(1, -1, 2).astype(np.int32), 1) + cv2.fillPoly(mask, box.reshape(1, -1, 2).astype("int32"), 1) return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0] def box_score_slow(self, bitmap, contour): @@ -214,7 +214,7 @@ class DBPostProcess(object): contour[:, 0] = contour[:, 0] - xmin contour[:, 1] = contour[:, 1] - ymin - cv2.fillPoly(mask, contour.reshape(1, -1, 2).astype(np.int32), 1) + cv2.fillPoly(mask, contour.reshape(1, -1, 2).astype("int32"), 1) return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0] def __call__(self, outs_dict, shape_list): -- Gitee From 9a6ebb86825f9b9ce21763a89c2ed43b0824cd75 Mon Sep 17 00:00:00 2001 From: Evezerest <50011306+Evezerest@users.noreply.github.com> Date: Thu, 22 Dec 2022 20:35:58 +0800 Subject: [PATCH 036/112] add applications (#8700) * Add 20 applications Add 20 applications * Update README Update README --- README_ch.md | 1 + applications/README.md | 57 ++-- ...07\345\255\227\350\257\206\345\210\253.md" | 268 ++++++++++++++++++ 3 files changed, 299 insertions(+), 27 deletions(-) create mode 100644 "applications/\350\222\231\345\217\244\346\226\207\344\271\246\347\261\215\346\226\207\345\255\227\350\257\206\345\210\253.md" diff --git a/README_ch.md b/README_ch.md index 51bf19d3..a54634df 100755 --- a/README_ch.md +++ b/README_ch.md @@ -27,6 +27,7 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库,助力 ## 📣 近期更新 +- 📚**2022.12 发布[《OCR产业范例20讲》电子书](./applications/README.md)**,新增蒙古文、身份证、液晶屏缺陷等**7个场景应用范例** - 🔨**2022.11 新增实现[4种前沿算法](doc/doc_ch/algorithm_overview.md)**:文本检测 [DRRG](doc/doc_ch/algorithm_det_drrg.md), 文本识别 [RFL](doc/doc_ch/algorithm_rec_rfl.md), 文本超分[Text Telescope](doc/doc_ch/algorithm_sr_telescope.md),公式识别[CAN](doc/doc_ch/algorithm_rec_can.md) - **2022.10 优化[JS版PP-OCRv3模型](./deploy/paddlejs/README_ch.md)**:模型大小仅4.3M,预测速度提升8倍,配套web demo开箱即用 - **💥 直播回放:PaddleOCR研发团队详解PP-StructureV2优化策略**。微信扫描[下方二维码](#开源社区),关注公众号并填写问卷后进入官方交流群,获取直播回放链接与20G重磅OCR学习大礼包(内含PDF转Word应用程序、10种垂类模型、《动手学OCR》电子书等) diff --git a/applications/README.md b/applications/README.md index 950adf76..35c8e52f 100644 --- a/applications/README.md +++ b/applications/README.md @@ -1,29 +1,32 @@ [English](README_en.md) | 简体中文 -# 场景应用 +# OCR产业范例20讲 PaddleOCR场景应用覆盖通用,制造、金融、交通行业的主要OCR垂类应用,在PP-OCR、PP-Structure的通用能力基础之上,以notebook的形式展示利用场景数据微调、模型优化方法、数据增广等内容,为开发者快速落地OCR应用提供示范与启发。 -- [教程文档](#1) - - [通用](#11) - - [制造](#12) - - [金融](#13) - - [交通](#14) +基于在各类垂类场景中落地的经验,PaddleOCR联合**北京师范大学副教授柯永红、云南省能源投资集团财务有限公司智能化项目经理钟榆星、信雅达科技股份有限公司高级研发工程师张少华、郑州三晖电气股份有限公司工程师郭媛媛、福建中烟工业有限责任公司工程师顾茜、内蒙古阿尔泰电子信息技术有限公司CTO欧日乐克、安科私(北京)科技有限公司创始人柯双喜等产学研同仁共同开源《OCR产业范例20讲》电子书**,通过Notebook的形式系统展示OCR在产业界应用的具体场景的调优过程与落地经验。该书包含以下特点: -- [模型下载](#2) +- 20例OCR在工业、金融、教育、交通等行业的关键场景应用范例; +- 覆盖从问题抽象、数据处理、训练调优、部署应用的全流程AI落地环节,为开发者提供常见的OCR优化思路; +- 每个范例配有交互式Notebook教程,通过代码展示获得实际结果,便于学习修改与二次开发; +- GitHub和AI Studio上开源本书中涉及的范例内容和代码,方便开发者学习和使用。 ## 教程文档 +《OCR产业范例20讲》中包含如下教程。如需获取整合后的电子版,请参考[资料下载](#2) + ### 通用 -| 类别 | 亮点 | 模型下载 | 教程 | 示例图 | -| ---------------------- | ------------------------------------------------------------ | -------------- | --------------------------------------- | ------------------------------------------------------------ | -| 高精度中文识别模型SVTR | 比PP-OCRv3识别模型精度高3%,
可用于数据挖掘或对预测效率要求不高的场景。 | [模型下载](#2) | [中文](./高精度中文识别模型.md)/English | | -| 手写体识别 | 新增字形支持 | [模型下载](#2) | [中文](./手写文字识别.md)/English | | +| 类别 | 亮点 | 模型下载 | 教程 | 示例图 | +| ---------------------- | ------------------------------------------------------------ | -------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | +| 高精度中文识别模型SVTR | 比PP-OCRv3识别模型精度高3%,
可用于数据挖掘或对预测效率要求不高的场景。 | [模型下载](#2) | [中文](./高精度中文识别模型.md)/English | | +| 手写体识别 | 新增字形支持 | [模型下载](#2) | [中文](./手写文字识别.md)/English | | +| 蒙文识别 | 新语种识别支持 | 即将开源 | [中文](./蒙古文书籍文字识别.md)/English | | +| 甲骨文识别 | 新语种识别支持 | [模型下载](#2) | [中文](https://aistudio.baidu.com/aistudio/projectdetail/5216041?contributionType=1)/English | | @@ -35,21 +38,24 @@ PaddleOCR场景应用覆盖通用,制造、金融、交通行业的主要OCR | 液晶屏读数识别 | 检测模型蒸馏、Serving部署 | [模型下载](#2) | [中文](./液晶屏读数识别.md)/English | | | 包装生产日期 | 点阵字符合成、过曝过暗文字识别 | [模型下载](#2) | [中文](./包装生产日期识别.md)/English | | | PCB文字识别 | 小尺寸文本检测与识别 | [模型下载](#2) | [中文](./PCB字符识别/PCB字符识别.md)/English | | -| 电表识别 | 大分辨率图像检测调优 | [模型下载](#2) | | | -| 液晶屏缺陷检测 | 非文字字符识别 | | | | +| 电表识别 | 大分辨率图像检测调优 | [模型下载](#2) | [中文](https://aistudio.baidu.com/aistudio/projectdetail/5297312?forkThirdPart=1)/English | | +| 液晶屏缺陷检测 | 非文字字符识别 | [模型下载](#2) | [中文](https://aistudio.baidu.com/aistudio/projectdetail/4268015)/English | | ### 金融 -| 类别 | 亮点 | 模型下载 | 教程 | 示例图 | -| -------------- | ----------------------------- | -------------- | ----------------------------------------- | ------------------------------------------------------------ | -| 表单VQA | 多模态通用表单结构化提取 | [模型下载](#2) | [中文](./多模态表单识别.md)/English | | -| 增值税发票 | 关键信息抽取,SER、RE任务训练 | [模型下载](#2) | [中文](./发票关键信息抽取.md)/English | | -| 印章检测与识别 | 端到端弯曲文本识别 | [模型下载](#2) | [中文](./印章弯曲文字识别.md)/English | | -| 通用卡证识别 | 通用结构化提取 | [模型下载](#2) | [中文](./快速构建卡证类OCR.md)/English | | -| 身份证识别 | 结构化提取、图像阴影 | | | | -| 合同比对 | 密集文本检测、NLP关键信息抽取 | [模型下载](#2) | [中文](./扫描合同关键信息提取.md)/English | | +| 类别 | 亮点 | 模型下载 | 教程 | 示例图 | +| ------------------ | --------------------------------- | -------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | +| 表单VQA | 多模态通用表单结构化提取 | [模型下载](#2) | [中文](./多模态表单识别.md)/English | | +| 增值税发票 | 关键信息抽取,SER、RE任务训练 | [模型下载](#2) | [中文](./发票关键信息抽取.md)/English | | +| 印章检测与识别 | 端到端弯曲文本识别 | [模型下载](#2) | [中文](./印章弯曲文字识别.md)/English | | +| 通用卡证识别 | 通用结构化提取 | [模型下载](#2) | [中文](./快速构建卡证类OCR.md)/English | | +| 银行电子回单 | 回单关键信息抽取 | --- | [中文](https://aistudio.baidu.com/aistudio/projectdetail/5267489?contributionType=1)/English | | +| 身份证识别 | 结构化提取、图像阴影 | [模型下载](#2) | [中文](https://aistudio.baidu.com/aistudio/projectdetail/4255861?contributionType=1)/English | | +| 合同比对 | 文本检测参数调整、NLP关键信息抽取 | --- | [中文](./扫描合同关键信息提取.md)/English | | +| 研报识别与实体统计 | 密集文本检测、NLP实体识别 | [模型下载](#2) | [中文](https://aistudio.baidu.com/aistudio/projectdetail/2574084)/English | | +| 通用表格识别 | 表格数据生成 | --- | [中文](https://aistudio.baidu.com/aistudio/projectdetail/5099668?contributionType=1)/English | | @@ -63,16 +69,13 @@ PaddleOCR场景应用覆盖通用,制造、金融、交通行业的主要OCR -## 模型下载 +## 资料下载 -如需下载上述场景中已经训练好的垂类模型,可以扫描下方二维码,关注公众号填写问卷后,加入PaddleOCR官方交流群获取20G OCR学习大礼包(内含《动手学OCR》电子书、课程回放视频、前沿论文等重磅资料) +如需下载**《OCR产业范例20讲》和上述场景中已经训练好的垂类模型**,可以扫描下方二维码,关注公众号填写问卷后,加入PaddleOCR官方交流群获取20G OCR学习大礼包(内含《动手学OCR》电子书、课程回放视频、前沿论文等重磅资料)
- 如果您是企业开发者且未在上述场景中找到合适的方案,可以填写[OCR应用合作调研问卷](https://paddle.wjx.cn/vj/QwF7GKw.aspx),免费与官方团队展开不同层次的合作,包括但不限于问题抽象、确定技术方案、项目答疑、共同研发等。如果您已经使用PaddleOCR落地项目,也可以填写此问卷,与飞桨平台共同宣传推广,提升企业技术品宣。期待您的提交! - -traffic - +traffic diff --git "a/applications/\350\222\231\345\217\244\346\226\207\344\271\246\347\261\215\346\226\207\345\255\227\350\257\206\345\210\253.md" "b/applications/\350\222\231\345\217\244\346\226\207\344\271\246\347\261\215\346\226\207\345\255\227\350\257\206\345\210\253.md" new file mode 100644 index 00000000..7470c947 --- /dev/null +++ "b/applications/\350\222\231\345\217\244\346\226\207\344\271\246\347\261\215\346\226\207\345\255\227\350\257\206\345\210\253.md" @@ -0,0 +1,268 @@ +# 蒙古文书籍文字识别 + +本范例将使用OCR技术实现蒙古文书籍文字识别检测和识别,通过本章您可以掌握: + +- 蒙古文的基本知识 +- 如何制作蒙古文字典与合成数据 +- 如何进行识别模型微调 + +本范例由内蒙古阿尔泰电子信息技术有限公司CTO欧日乐克、苏日图、达布希腊图、歆琪乐与飞桨联合打造 + +## 背景介绍 + +蒙古文文字识别技术在蒙古文信息处理领域成为一个亟待解决的问题。但由于诸多主客观原因,产品化蒙古文文字识别技术上有一段距离。而书籍的数字化是其中一项重要的课题,但因为涉及相关技术较多,难度较高,在整体上解决蒙古文书籍文字识别技术还不成熟。 + +![pic_script](https://user-images.githubusercontent.com/50011306/206182800-a3029f08-dece-4cbe-9298-a66eb413137a.jpeg) + +*基本概念 +```txt +字:即字符,不同于汉字的字(Character)一字一形,并包含字意,蒙古文的字类似于英语的 +字母(Letter)是一种表音文字,但却拥有若干字形(Glyph),若干个字构成一个词。 + +名义字符:Unicode十大法则中规定,"Character is not Glyph"(字符不是字形)。 +于是像蒙古文一样有形态变化的文字就有了一个字形代表其他形态的字形。该字形被记录在 +Unicode基本表中并分配编码。如蒙古文 ᠠ([a]) 为 a 的独立形式,分配码位\u1820, +记作uni1820.isol,代表所有词首uni1820.init、词中uni1820.medi和词尾1820.fina形式。 + +变形显现字符:除名义字符外的其他没有实际编码位置的字形。在蒙古文文本中看到的字符皆为通过 +Unicode Scripts Processor(简称USP)处理后显示出来的字符。 + +词:语言里最小的可以独立运用的单位,等同于英语的word,以空格分割的字符串。 +``` + +### 项目难点 + +- 1.蒙古文字符的复杂性 + + 一形多字 + + ![pic2](https://user-images.githubusercontent.com/50011306/206182327-b4a888a6-e67e-4d87-992d-0ddb830de85f.png) + + 多字一形 + + ![pic3](https://user-images.githubusercontent.com/50011306/206182335-25b8c657-98da-4532-ae7d-608608a8f047.png) + +- 2.蒙古文排版方向(竖写、换行左->右) + +书写方向与换行方向会影响识别后的排序以及文字方向。 + + ![pic4](https://user-images.githubusercontent.com/50011306/206182347-c5e2525a-f1fd-4ee6-936c-946435b3fe6b.png) + +- 3.蒙古文字宽、行宽的不一致性 + + + ![pic5](https://user-images.githubusercontent.com/50011306/206182391-431c2441-1d1d-4f25-931c-b0f663bf3285.png) + + +- 4.字符中的部件难以区分(MVS、NNBSP点的处理,以及合体字形) + +蒙古文中有一些有关形态变化的控制字符,其中最频繁出现的有 Mongolian Vowel Separator +(MVS,\u180E),和 Narrow No-Break Space(NNBSP,\u202F)。该两个控制符正常 +情况下在文本中是透明的,比空格窄,不能换行。MVS用于连接词干与词尾a、e元音,NNBSP则 +用于连接词与词附加成分(可以理解成分写后缀)。MVS会引发双向形变,而NNBSP则会引发后位 +形变。 + +此外,蒙古文中有一些字形为另一个字形的组成部件,导致识别时不好对应结果。 + + +针对以上问题, 本例选用PP-OCRv3这一开源超轻量OCR系统进行蒙古文文本识别系统的开发。我们首先使用数据合成工具合成了250万数据,基于这部分数据进行训练,通过精准切除白色边缘,随机加入标点符号,调整评估集数据使识别精度提升至75.78%。效果如下: + +| 策略 | 精度 %| +| :--------------- | :-------- | +| 合成数据训练 | 42.38| +| 优化合成训练 | 75.78| + +具体流程为: +- 第一步,选取真实语料并校对,并把语料副本转换为非Unicode编码版本 +- 第二步,选择多种字体生成,按行生成,生成时建议使用非Unicode字体生成 +- 第三步,从真实的扫描文本图片中按行切割保存,并保证对每个图进行Unicode编码的标注 +评估集数据均采用真实图片样本。 +- 第四步,开始训练 +- 第五部,识别文字 + +评估集数据的质量对模型的训练效率也起到很大的作用。 + +## 快速体验 +### 环境搭建 + +本任务基于Aistudio完成, 具体环境如下: + +- 操作系统: Linux +- PaddlePaddle: 2.3 +- PaddleOCR: Release/2.5 +- text_renderer: master + +下载PaddlleOCR代码并安装依赖库: +```bash +git clone -b dygraph https://gitee.com/paddlepaddle/PaddleOCR + +# 安装依赖库 +cd PaddleOCR +pip install -r PaddleOCR/requirements.txt +``` + +### 模型推理 +将下载或训练完成的模型放置在`PaddleOCR/output`下, 然后使用如下命令即可快速进行模型推理 +```bash +python tools/infer_rec.py -c configs/rec/PP-OCRv3/multi_language/Mongolian_PP-OCRv3_rec.yml \ + -o Global.pretrained_model=output/v3_Mongolian_mobile/best_accuracy \ + Global.infer_img=doc/imgs_words/ +``` + + +## 数据准备 + +本项目从真实语料中生成250万张图片作为训练集。另外生成1万张图片作为验证集。 + +### 语料准备 +蒙古文由于编码原因生成图片时不能直接用 Unicode 字符串生成。蒙古文 Unicode 的本质是音码,伴随复杂的形态变化,如果没有对应的复杂文本处理程序则只能显示蒙古文的名义字符,而非变形显现字符。 +因此如果想生成蒙古文图片则需要: 1.调用Windows系统的 USP10.dll,2.用支持形码的字体生成。 +本项目使用了第二种方案,即使用形码的字体生成图片,并对应 Unicode 标签。 + +直接使用 Unicode 生成的情况(字符会分开并以名义字符显示): + +![pic8](https://user-images.githubusercontent.com/50011306/206183135-d8be1ff7-4e3b-404f-bf5c-c0b47d5d4718.png) + +$$\mbox{左列为Unicode生成图片,右列为Unicode文本}$$ + +![pic9](https://user-images.githubusercontent.com/50011306/206183154-3aec2415-66fb-41b8-872d-49aad4b62113.png) + +$$\mbox{左列为Unicode文本,右列为形码生成图片}$$ + +生成图片时建议将字符串长度保持在5个词(平均30个字符),否则训练较为困难。 + +### 图片处理 + +部分训练图片示例如下: + +![pic6](https://user-images.githubusercontent.com/50011306/206182740-d7e38be8-e857-45a4-8639-2a8656c9f8e5.png) + +为验证模型对实际图片的效果,验证图片采用了真实扫描图片。在扫描完整的页面后对 + +标签文件格式如下: + + + + +|数据集类型|数量| +|---|---| +|训练集| 250万| +|验证集| 1.1万| + + + +数据文件结构如下: + +```txt +PaddleOCRv3 +├── train_data # 训练数据文件夹 +│ ├── texts +│ │ ├── train1.txt # 生成的训练数据标签,与图片文档一一对应 +│ │ ├── train2.txt +│ │ ├── train3.txt +│ │ ├── train4.txt +│ │ ├── train11.txt +│ │ ├── train20.txt +│ │ ├── train21.txt +│ │ └── train22.txt +│ ├── image1 # 生成的训练图片 +│ ├── image2 +│ ├── image3 +│ ├── image4 +│ ├── image11 +│ ├── image20 +│ ├── image21 +│ └── image22 +├── test_data # 验证数据文件夹 +│ ├── test_data.txt # 验证数据标签 +│ ├── 0 # 每个文件夹有34张图片 +│ ├── 1 +: : +: : +│ └── 409 +``` +### 制作字典 + +根据 Unicode 编码顺序制作一个包含所有蒙古文字符的文本字典,建议保存到./ppocr/utils/dict目录下面,并在yml文件中更改地址。 + + + + +## 基于合成数据训练 +### 模型训练和评估 + +准备好合成数据后,我们可以使用以下命令训练数据: + +```bash +cd ${PaddleOCR_root} +python tools/train.py -c configs/rec/PP-OCRv3/multi_language/Mongolian_PP-OCRv3_rec.yml +``` +如果想从断点继续训练: +```bash +cd ${PaddleOCR_root} +python tools/train.py -c configs/rec/PP-OCRv3/multi_language/Mongolian_PP-OCRv3_rec.yml \ + -o Global.checkpoints=./output/v3_Mongolian_mobile/best_accuracy +``` +可填各参数含义如下: + +```txt +-c: 指定使用的配置文件,Mongolian_PP-OCRv3_rec.yml对应于OCRv3识别模型。 +-o: 覆盖配置文件中参数 +Global.pretrained_model: 指定使用的预训练模型 +Global.checkpoints: 断点位置 +Global.epoch_num: 指定训练的epoch数 +Global.eval_batch_step: 间隔多少step做一次评估 +Train.dataset.data_dir: 训练数据集路径 +Train.dataset.label_file_list: 训练集文件列表 +Train.loader.batch_size_per_card: 训练单卡batch size +Eval.dataset.data_dir: 评估数据集路径 +Eval.dataset.label_file_list: 评估数据集文件列表 +Eval.loader.batch_size_per_card: 评估单卡batch size +``` + +### 模型推测 +训练好的模型推测如下: +```bash +python tools/infer_rec.py -c configs/rec/PP-OCRv3/multi_language/Mongolian_PP-OCRv3_rec.yml \ + -o Global.pretrained_model=output/v3_Mongolian_mobile/best_accuracy \ + Global.infer_img=doc/imgs_words/ +``` +## 用真实数据测试模型 + +训练完成后可以测试模型。可将测试图片指定到某文件夹: +```shell +PaddleOCRv3 +├── doc +├── imgs_words +│ ├── arabic +│ ├── belarusian +│ ├── bulgarian +: : +: : +│ ├── mongolian # 在此放入真实蒙古文图片,一个图片一行 +│ └── uyghur +``` +快速推测 + +```bash +python tools/eval.py -c configs/rec/PP-OCRv3/multi_language/Mongolian_PP-OCRv3_rec.yml \ + -o Global.checkpoints=./output/v3_Mongolian_mobile/best_accuracy +``` +推测结果将被记录在predicts_ppocrv3_Mongolian.txt文件中。 + +```shell +PaddleOCRv3 +├── output +│ ├── rec +│ │ └── predicts_ppocrv3_Mongolian.txt +│ └── v3_Mongolian_mobile +``` + +部分结果:三列分别为推测结果、真实标签、图片 + +![pic7](https://user-images.githubusercontent.com/50011306/206182924-57472dc7-fd74-4872-8466-15c05eeb369d.png) + + +## 总结 + +本例选用PP-OCRv3这一开源超轻量OCR系统进行蒙古文文本识别系统的开发。加入250万合成数据,在现有模型基础上进行微调,通过修正训练集,设定评估标准,最终将蒙古文识别精度从42%提升至75%。 -- Gitee From 1c40b84e4d4e45fe389ae094bb0c752384f17d0f Mon Sep 17 00:00:00 2001 From: jingsongliu <45508593+jingsongliujing@users.noreply.github.com> Date: Mon, 26 Dec 2022 10:50:36 +0800 Subject: [PATCH 037/112] Update quickstart.md (#8622) --- doc/doc_ch/quickstart.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/doc_ch/quickstart.md b/doc/doc_ch/quickstart.md index cac7664c..cdae287f 100644 --- a/doc/doc_ch/quickstart.md +++ b/doc/doc_ch/quickstart.md @@ -176,13 +176,14 @@ for idx in range(len(result)): print(line) # 显示结果 +# 如果本地没有simfang.ttf,可以在doc/fonts目录下下载 from PIL import Image result = result[0] image = Image.open(img_path).convert('RGB') boxes = [line[0] for line in result] txts = [line[1][0] for line in result] scores = [line[1][1] for line in result] -im_show = draw_ocr(image, boxes, txts, scores, font_path='./fonts/simfang.ttf') +im_show = draw_ocr(image, boxes, txts, scores, font_path='doc/fonts/simfang.ttf') im_show = Image.fromarray(im_show) im_show.save('result.jpg') ``` -- Gitee From 3560ff3a78bd114a529b202b0e98e92a57804473 Mon Sep 17 00:00:00 2001 From: Tuan Nguyen Date: Mon, 26 Dec 2022 11:53:17 +0900 Subject: [PATCH 038/112] Fix config link in algorithm_det_db_en.md (#8544) --- doc/doc_en/algorithm_det_db_en.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/doc_en/algorithm_det_db_en.md b/doc/doc_en/algorithm_det_db_en.md index fde344c3..7ab4a3a2 100644 --- a/doc/doc_en/algorithm_det_db_en.md +++ b/doc/doc_en/algorithm_det_db_en.md @@ -31,7 +31,7 @@ On the ICDAR2015 dataset, the text detection result is as follows: | --- | --- | --- | --- | --- | --- | --- | |DB|ResNet50_vd|[configs/det/det_r50_vd_db.yml](../../configs/det/det_r50_vd_db.yml)|86.41%|78.72%|82.38%|[trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_db_v2.0_train.tar)| |DB|MobileNetV3|[configs/det/det_mv3_db.yml](../../configs/det/det_mv3_db.yml)|77.29%|73.08%|75.12%|[trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar)| -|DB++|ResNet50|[configs/det/det_r50_db++_ic15.yml](../../configs/det/det_r50_db++_ic15.yml)|90.89%|82.66%|86.58%|[pretrained model](https://paddleocr.bj.bcebos.com/dygraph_v2.1/en_det/ResNet50_dcn_asf_synthtext_pretrained.pdparams)/[trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.1/en_det/det_r50_db%2B%2B_icdar15_train.tar)| +|DB++|ResNet50|[configs/det/det_r50_db++_icdar15.yml](../../configs/det/det_r50_db++_icdar15.yml)|90.89%|82.66%|86.58%|[pretrained model](https://paddleocr.bj.bcebos.com/dygraph_v2.1/en_det/ResNet50_dcn_asf_synthtext_pretrained.pdparams)/[trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.1/en_det/det_r50_db%2B%2B_icdar15_train.tar)| On the TD_TR dataset, the text detection result is as follows: -- Gitee From 90004fe6aff12cf5e02ddfc09e27d2935511c499 Mon Sep 17 00:00:00 2001 From: Evezerest <50011306+Evezerest@users.noreply.github.com> Date: Wed, 28 Dec 2022 16:43:40 +0800 Subject: [PATCH 039/112] Update ocr_book.md --- doc/doc_ch/ocr_book.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/doc_ch/ocr_book.md b/doc/doc_ch/ocr_book.md index b570ae4b..420bc886 100644 --- a/doc/doc_ch/ocr_book.md +++ b/doc/doc_ch/ocr_book.md @@ -1,6 +1,6 @@ # 《动手学OCR》电子书 -《动手学OCR》是PaddleOCR团队携手复旦大学青年研究员陈智能、中国移动研究院视觉领域资深专家黄文辉等产学研同仁,以及OCR开发者共同打造的结合OCR前沿理论与代码实践的教材。主要特色如下: +《动手学OCR》是PaddleOCR团队携手华中科技大学博导/教授,IAPR Fellow 白翔、复旦大学青年研究员陈智能、中国移动研究院视觉领域资深专家黄文辉、中国工商银行大数据人工智能实验室研究员等产学研同仁,以及OCR开发者共同打造的结合OCR前沿理论与代码实践的教材。主要特色如下: - 覆盖从文本检测识别到文档分析的OCR全栈技术 - 紧密结合理论实践,跨越代码实现鸿沟,并配套教学视频 -- Gitee From b4c25a36631e5c29eb349d185835ac7cdbb72952 Mon Sep 17 00:00:00 2001 From: Evezerest <50011306+Evezerest@users.noreply.github.com> Date: Wed, 28 Dec 2022 17:13:01 +0800 Subject: [PATCH 040/112] Update README.md --- applications/README.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/applications/README.md b/applications/README.md index 35c8e52f..ab51cec2 100644 --- a/applications/README.md +++ b/applications/README.md @@ -2,9 +2,7 @@ # OCR产业范例20讲 -PaddleOCR场景应用覆盖通用,制造、金融、交通行业的主要OCR垂类应用,在PP-OCR、PP-Structure的通用能力基础之上,以notebook的形式展示利用场景数据微调、模型优化方法、数据增广等内容,为开发者快速落地OCR应用提供示范与启发。 - -基于在各类垂类场景中落地的经验,PaddleOCR联合**北京师范大学副教授柯永红、云南省能源投资集团财务有限公司智能化项目经理钟榆星、信雅达科技股份有限公司高级研发工程师张少华、郑州三晖电气股份有限公司工程师郭媛媛、福建中烟工业有限责任公司工程师顾茜、内蒙古阿尔泰电子信息技术有限公司CTO欧日乐克、安科私(北京)科技有限公司创始人柯双喜等产学研同仁共同开源《OCR产业范例20讲》电子书**,通过Notebook的形式系统展示OCR在产业界应用的具体场景的调优过程与落地经验。该书包含以下特点: +PaddleOCR场景应用覆盖通用,制造、金融、交通等行业的主要OCR垂类应用,基于PP-OCR、PP-Structure的通用能力和各类垂类场景中落地的经验,PaddleOCR联合**北京师范大学副教授柯永红、云南省能源投资集团财务有限公司智能化项目经理钟榆星、信雅达科技股份有限公司高级研发工程师张少华、郑州三晖电气股份有限公司工程师郭媛媛、福建中烟工业有限责任公司工程师顾茜、内蒙古阿尔泰电子信息技术有限公司CTO欧日乐克、安科私(北京)科技有限公司创始人柯双喜等产学研同仁共同开源《OCR产业范例20讲》电子书**,通过Notebook的形式系统展示OCR在产业界应用的具体场景的调优过程与落地经验,为开发者快速落地OCR应用提供示范与启发。该书包含以下特点: - 20例OCR在工业、金融、教育、交通等行业的关键场景应用范例; - 覆盖从问题抽象、数据处理、训练调优、部署应用的全流程AI落地环节,为开发者提供常见的OCR优化思路; -- Gitee From 36bf83239d1dae39baf4b330b2b283373610f48b Mon Sep 17 00:00:00 2001 From: Evezerest <50011306+Evezerest@users.noreply.github.com> Date: Wed, 28 Dec 2022 17:15:15 +0800 Subject: [PATCH 041/112] Update README.md --- applications/README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/applications/README.md b/applications/README.md index ab51cec2..e1529ed6 100644 --- a/applications/README.md +++ b/applications/README.md @@ -69,11 +69,12 @@ PaddleOCR场景应用覆盖通用,制造、金融、交通等行业的主要OC ## 资料下载 -如需下载**《OCR产业范例20讲》和上述场景中已经训练好的垂类模型**,可以扫描下方二维码,关注公众号填写问卷后,加入PaddleOCR官方交流群获取20G OCR学习大礼包(内含《动手学OCR》电子书、课程回放视频、前沿论文等重磅资料) +如需下载《OCR产业范例20讲》和上述场景中已经训练好的垂类模型,可以扫描下方二维码,关注公众号填写问卷后,加入PaddleOCR官方交流群获取20G OCR学习大礼包(内含《动手学OCR》电子书、课程回放视频、前沿论文等重磅资料)
+ 如果您是企业开发者且未在上述场景中找到合适的方案,可以填写[OCR应用合作调研问卷](https://paddle.wjx.cn/vj/QwF7GKw.aspx),免费与官方团队展开不同层次的合作,包括但不限于问题抽象、确定技术方案、项目答疑、共同研发等。如果您已经使用PaddleOCR落地项目,也可以填写此问卷,与飞桨平台共同宣传推广,提升企业技术品宣。期待您的提交! traffic -- Gitee From f9d0948b0c8ea8abcf9dc1b29495569bd73ae8bb Mon Sep 17 00:00:00 2001 From: zhoujun Date: Wed, 4 Jan 2023 16:25:06 +0800 Subject: [PATCH 042/112] fixed opencv version (#8772) --- requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index b6dd6e57..4aabba9b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,8 +7,8 @@ tqdm numpy visualdl rapidfuzz -opencv-python -opencv-contrib-python +opencv-python==4.6.0.66 +opencv-contrib-python==4.6.0.66 cython lxml premailer -- Gitee From 0e1148312be91f7fd7ffcb3f57c4c63fcdd5b67f Mon Sep 17 00:00:00 2001 From: Double_V Date: Fri, 6 Jan 2023 14:30:23 +0800 Subject: [PATCH 043/112] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dnp.int=20(#8793)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix issue 9659 and doc * Update README.md --- deploy/slim/quantization/README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/deploy/slim/quantization/README.md b/deploy/slim/quantization/README.md index 7f1ff7ae..cfec456f 100644 --- a/deploy/slim/quantization/README.md +++ b/deploy/slim/quantization/README.md @@ -54,4 +54,6 @@ python deploy/slim/quantization/export_model.py -c configs/det/ch_PP-OCRv3/ch_PP ### 5. 量化模型部署 上述步骤导出的量化模型,参数精度仍然是FP32,但是参数的数值范围是int8,导出的模型可以通过PaddleLite的opt模型转换工具完成模型转换。 -量化模型部署的可参考 [移动端模型部署](../../lite/readme.md) +量化模型移动端部署的可参考 [移动端模型部署](../../lite/readme.md) + +备注:量化训练后的模型参数是float32类型,转inference model预测时相对不量化无加速效果,原因是量化后模型结构之间存在量化和反量化算子,如果要使用量化模型部署,建议使用TensorRT并设置precision为INT8加速量化模型的预测时间。 -- Gitee From 418d80c948c05a13b2f2f5a023e1b36aae7bb591 Mon Sep 17 00:00:00 2001 From: Ikko Eltociear Ashimine Date: Tue, 10 Jan 2023 18:54:44 +0900 Subject: [PATCH 044/112] =?UTF-8?q?Update=20README=5F=E6=97=A5=E6=9C=AC?= =?UTF-8?q?=E8=AA=9E.md=20(#8755)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix markdown style --- "doc/doc_i18n/README_\346\227\245\346\234\254\350\252\236.md" | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git "a/doc/doc_i18n/README_\346\227\245\346\234\254\350\252\236.md" "b/doc/doc_i18n/README_\346\227\245\346\234\254\350\252\236.md" index a75003ec..4c04db8e 100644 --- "a/doc/doc_i18n/README_\346\227\245\346\234\254\350\252\236.md" +++ "b/doc/doc_i18n/README_\346\227\245\346\234\254\350\252\236.md" @@ -89,7 +89,7 @@ paddleocr --image_dir /your/test/image.jpg --lang=japan # change for i18n abbr | 中国語と英語の超軽量 PP-OCRv3 モデル(16.2M) | ch_PP-OCRv3_xx | モバイル & サーバー | [推論モデル](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar) / [トレーニングモデル](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_distill_train.tar) | [推論モデル](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar) / [トレーニングモデル](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_train.tar) | -- その他のモデルのダウンロード (多言語を含む) については、[PP-OCR シリーズ モデルのダウンロード] (../doc_en/models_list_en.md)をご参照ください。 +- その他のモデルのダウンロード (多言語を含む) については、[PP-OCR シリーズ モデルのダウンロード](../doc_en/models_list_en.md)をご参照ください。 - 新しい言語のリクエストについては、 [新しい言語_リクエストのガイドライン](#language_requests)を参照してください。 - 構造文書分析モデルについては、[PP-Structure models](../../ppstructure/docs/models_list_en.md)をご参照ください。 -- Gitee From 5e911afa37da994d0b1eff6548a4b9138886812f Mon Sep 17 00:00:00 2001 From: Dhruv Awasthi Date: Wed, 11 Jan 2023 09:06:56 +0530 Subject: [PATCH 045/112] Fix typo and grammatical error (#8785) --- ppstructure/kie/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ppstructure/kie/README.md b/ppstructure/kie/README.md index 872edb95..d8e18443 100644 --- a/ppstructure/kie/README.md +++ b/ppstructure/kie/README.md @@ -89,7 +89,7 @@ Boxes of different colors in the image represent different categories. The invoice and application form images have three categories: `request`, `answer` and `header`. The `question` and 'answer' can be used to extract the relationship. -For the ID card image, the mdoel can be directly identify the key information such as `name`, `gender`, `nationality`, so that the subsequent relationship extraction process is not required, and the key information extraction task can be completed using only on model. +For the ID card image, the model can directly identify the key information such as `name`, `gender`, `nationality`, so that the subsequent relationship extraction process is not required, and the key information extraction task can be completed using only on model. ### 3.2 RE -- Gitee From ecd29bd28b3fa188efcda06f9e6fe94570613310 Mon Sep 17 00:00:00 2001 From: Dhruv Awasthi Date: Wed, 11 Jan 2023 09:08:32 +0530 Subject: [PATCH 046/112] Fix broken link to install paddlepaddle (#8729) The link provided for installing paddlepaddle doesn't work. Hence, this change updates the broken link to install paddlepaddle for CPU and GPU. --- doc/doc_en/quickstart_en.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/doc_en/quickstart_en.md b/doc/doc_en/quickstart_en.md index ea38845f..3479e190 100644 --- a/doc/doc_en/quickstart_en.md +++ b/doc/doc_en/quickstart_en.md @@ -28,13 +28,13 @@ - If you have CUDA 9 or CUDA 10 installed on your machine, please run the following command to install ```bash - python3 -m pip install paddlepaddle-gpu -i https://mirror.baidu.com/pypi/simple + python -m pip install paddlepaddle-gpu -i https://pypi.tuna.tsinghua.edu.cn/simple ``` - If you have no available GPU on your machine, please run the following command to install the CPU version ```bash - python3 -m pip install paddlepaddle -i https://mirror.baidu.com/pypi/simple + python -m pip install paddlepaddle -i https://pypi.tuna.tsinghua.edu.cn/simple ``` For more software version requirements, please refer to the instructions in [Installation Document](https://www.paddlepaddle.org.cn/install/quick) for operation. -- Gitee From c811e626895bdce94d6360d2c9835275fcffea85 Mon Sep 17 00:00:00 2001 From: Dhruv Awasthi Date: Wed, 11 Jan 2023 09:09:46 +0530 Subject: [PATCH 047/112] Fix: broken link for whl package documentation. (#8719) This proposed change fixes the broken link under the section `2.4 Parameter Description` in the last line that says: Most of the parameters are consistent with the PaddleOCR whl package, see `whl package documentation`. --- ppstructure/docs/quickstart_en.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ppstructure/docs/quickstart_en.md b/ppstructure/docs/quickstart_en.md index 9229a79d..bbaac342 100644 --- a/ppstructure/docs/quickstart_en.md +++ b/ppstructure/docs/quickstart_en.md @@ -311,7 +311,7 @@ Please refer to: [Key Information Extraction](../kie/README.md) . | save_pdf | Whether to convert docx to pdf when recovery| False | | structure_version | Structure version, optional PP-structure and PP-structurev2 | PP-structure | -Most of the parameters are consistent with the PaddleOCR whl package, see [whl package documentation](../../doc/doc_en/whl.md) +Most of the parameters are consistent with the PaddleOCR whl package, see [whl package documentation](../../doc/doc_en/whl_en.md) ## 3. Summary -- Gitee From 303e81b0b44b092ee0d7c7765d46a0952a76d71c Mon Sep 17 00:00:00 2001 From: andyj <87074272+andyjpaddle@users.noreply.github.com> Date: Wed, 11 Jan 2023 15:18:00 +0800 Subject: [PATCH 048/112] [doc] fix inference args default type (#8824) * add en doc * fix dead link & test=document_fix * fix dead link & test=document_fix * update args default type & test=document_fix --- doc/doc_ch/inference_args.md | 4 ++-- doc/doc_en/inference_args_en.md | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/doc_ch/inference_args.md b/doc/doc_ch/inference_args.md index 24e7223e..890696a4 100644 --- a/doc/doc_ch/inference_args.md +++ b/doc/doc_ch/inference_args.md @@ -88,7 +88,7 @@ PSE算法相关参数如下 | :--: | :--: | :--: | :--: | | rec_algorithm | str | "CRNN" | 文本识别算法名称,目前支持`CRNN`, `SRN`, `RARE`, `NETR`, `SAR`, `ViTSTR`, `ABINet`, `VisionLAN`, `SPIN`, `RobustScanner`, `SVTR`, `SVTR_LCNet` | | rec_model_dir | str | 无,如果使用识别模型,该项是必填项 | 识别inference模型路径 | -| rec_image_shape | list | [3, 48, 320] | 识别时的图像尺寸 | +| rec_image_shape | str | "3,48,320" | 识别时的图像尺寸 | | rec_batch_num | int | 6 | 识别的batch size | | max_text_length | int | 25 | 识别结果最大长度,在`SRN`中有效 | | rec_char_dict_path | str | "./ppocr/utils/ppocr_keys_v1.txt" | 识别的字符字典文件 | @@ -115,7 +115,7 @@ PSE算法相关参数如下 | :--: | :--: | :--: | :--: | | use_angle_cls | bool | False | 是否使用方向分类器 | | cls_model_dir | str | 无,如果需要使用,则必须显式指定路径 | 方向分类器inference模型路径 | -| cls_image_shape | list | [3, 48, 192] | 预测尺度 | +| cls_image_shape | str | "3,48,192" | 预测尺度 | | label_list | list | ['0', '180'] | class id对应的角度值 | | cls_batch_num | int | 6 | 方向分类器预测的batch size | | cls_thresh | float | 0.9 | 预测阈值,模型预测结果为180度,且得分大于该阈值时,认为最终预测结果为180度,需要翻转 | diff --git a/doc/doc_en/inference_args_en.md b/doc/doc_en/inference_args_en.md index b28cd843..f0435a18 100644 --- a/doc/doc_en/inference_args_en.md +++ b/doc/doc_en/inference_args_en.md @@ -88,7 +88,7 @@ The relevant parameters of the PSE algorithm are as follows | :--: | :--: | :--: | :--: | | rec_algorithm | str | "CRNN" | Text recognition algorithm name, currently supports `CRNN`, `SRN`, `RARE`, `NETR`, `SAR`, `ViTSTR`, `ABINet`, `VisionLAN`, `SPIN`, `RobustScanner`, `SVTR`, `SVTR_LCNet` | | rec_model_dir | str | None, it is required if using the recognition model | recognition inference model paths | -| rec_image_shape | list | [3, 48, 320] | Image size at the time of recognition | +| rec_image_shape | str | "3,48,320" ] | Image size at the time of recognition | | rec_batch_num | int | 6 | batch size | | max_text_length | int | 25 | The maximum length of the recognition result, valid in `SRN` | | rec_char_dict_path | str | "./ppocr/utils/ppocr_keys_v1.txt" | character dictionary file | @@ -115,7 +115,7 @@ The relevant parameters of the PSE algorithm are as follows | :--: | :--: | :--: | :--: | | use_angle_cls | bool | False | whether to use an angle classifier | | cls_model_dir | str | None, if you need to use, you must specify the path explicitly | angle classifier inference model path | -| cls_image_shape | list | [3, 48, 192] | prediction shape | +| cls_image_shape | str | "3,48,192" | prediction shape | | label_list | list | ['0', '180'] | The angle value corresponding to the class id | | cls_batch_num | int | 6 | batch size | | cls_thresh | float | 0.9 | Prediction threshold, when the model prediction result is 180 degrees, and the score is greater than the threshold, the final prediction result is considered to be 180 degrees and needs to be flipped | -- Gitee From 15146599791fe5475f4dcc91329b4703d96080f1 Mon Sep 17 00:00:00 2001 From: Evezerest <50011306+Evezerest@users.noreply.github.com> Date: Fri, 13 Jan 2023 15:32:08 +0800 Subject: [PATCH 049/112] Update README.md --- deploy/paddlejs/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/paddlejs/README.md b/deploy/paddlejs/README.md index 8eefc387..6fd442bf 100644 --- a/deploy/paddlejs/README.md +++ b/deploy/paddlejs/README.md @@ -5,7 +5,7 @@ English| [简体中文](README_ch.md) [Paddle.js](https://github.com/PaddlePaddle/Paddle.js) is a web project for Baidu PaddlePaddle, which is an open source deep learning framework running in the browser. Paddle.js can either load a pre-trained model, or transforming a model from paddle-hub with model transforming tools provided by Paddle.js. It could run in every browser with WebGL/WebGPU/WebAssembly supported. It could also run in Baidu Smartprogram and wechat miniprogram. ## Web Demo -Run OCR demo in browser refer to [tutorial](https://github.com/PaddlePaddle/FastDeploy/blob/develop/examples/application/js/WebDemo_en.md). +Run OCR demo in browser refer to [tutorial](https://github.com/PaddlePaddle/FastDeploy/tree/develop/examples/application/js/web_demo). |demo|web demo dicrctory|visualization| |-|-|-| -- Gitee From 4aff082c5b6369aa4f966b6e5ccdcc8265076a87 Mon Sep 17 00:00:00 2001 From: Evezerest <50011306+Evezerest@users.noreply.github.com> Date: Fri, 13 Jan 2023 15:32:39 +0800 Subject: [PATCH 050/112] Update README_ch.md --- deploy/paddlejs/README_ch.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/paddlejs/README_ch.md b/deploy/paddlejs/README_ch.md index d6c0f33f..e30a3255 100644 --- a/deploy/paddlejs/README_ch.md +++ b/deploy/paddlejs/README_ch.md @@ -7,7 +7,7 @@ ## Web Demo使用 -在浏览器中直接运行官方OCR demo参考[教程](https://github.com/PaddlePaddle/FastDeploy/blob/develop/examples/application/js/WebDemo.md) +在浏览器中直接运行官方OCR demo参考[教程](https://github.com/PaddlePaddle/FastDeploy/tree/develop/examples/application/js/web_demo) |demo名称|web demo目录|可视化| |-|-|-| -- Gitee From 29cdda4eda0b3867d33b2ce018532ff96ac872b2 Mon Sep 17 00:00:00 2001 From: topduke <784990967@qq.com> Date: Thu, 19 Jan 2023 22:14:31 +0800 Subject: [PATCH 051/112] add svtr FAQ ang data_aug (#8865) * Update rec_nrtr_head.py * add svtr FAQ and data_aug --- configs/rec/rec_svtrnet.yml | 12 ++++--- configs/rec/rec_svtrnet_ch.yml | 4 +-- doc/doc_ch/algorithm_rec_svtr.md | 18 +++++++++- doc/doc_en/algorithm_rec_svtr_en.md | 18 +++++++++- ppocr/data/imaug/__init__.py | 2 +- ppocr/data/imaug/abinet_aug.py | 51 +++++++++++++++++++++++++++++ ppocr/data/imaug/rec_img_aug.py | 40 ++++++++++++++++++++-- 7 files changed, 132 insertions(+), 13 deletions(-) diff --git a/configs/rec/rec_svtrnet.yml b/configs/rec/rec_svtrnet.yml index e8ceefea..82b8273a 100644 --- a/configs/rec/rec_svtrnet.yml +++ b/configs/rec/rec_svtrnet.yml @@ -26,10 +26,10 @@ Optimizer: name: AdamW beta1: 0.9 beta2: 0.99 - epsilon: 8.e-8 + epsilon: 1.e-8 weight_decay: 0.05 no_weight_decay_name: norm pos_embed - one_dim_param_no_weight_decay: true + one_dim_param_no_weight_decay: True lr: name: Cosine learning_rate: 0.0005 @@ -48,7 +48,7 @@ Architecture: Backbone: name: SVTRNet img_size: [32, 100] - out_char_num: 25 + out_char_num: 25 # W//4 or W//8 or W/12 out_channels: 192 patch_merging: 'Conv' embed_dim: [64, 128, 256] @@ -57,7 +57,7 @@ Architecture: mixer: ['Local','Local','Local','Local','Local','Local','Global','Global','Global','Global','Global','Global'] local_mixer: [[7, 11], [7, 11], [7, 11]] last_stage: True - prenorm: false + prenorm: False Neck: name: SequenceEncoder encoder_type: reshape @@ -82,6 +82,8 @@ Train: - DecodeImage: # load image img_mode: BGR channel_first: False + - SVTRRecAug: + aug_type: 0 # or 1 - CTCLabelEncode: # Class handling label - SVTRRecResizeImg: image_shape: [3, 64, 256] @@ -92,7 +94,7 @@ Train: shuffle: True batch_size_per_card: 512 drop_last: True - num_workers: 4 + num_workers: 8 Eval: dataset: diff --git a/configs/rec/rec_svtrnet_ch.yml b/configs/rec/rec_svtrnet_ch.yml index 0d3f63d1..597e57fb 100644 --- a/configs/rec/rec_svtrnet_ch.yml +++ b/configs/rec/rec_svtrnet_ch.yml @@ -23,7 +23,7 @@ Optimizer: name: AdamW beta1: 0.9 beta2: 0.99 - epsilon: 8.0e-08 + epsilon: 1.0e-08 weight_decay: 0.05 no_weight_decay_name: norm pos_embed one_dim_param_no_weight_decay: true @@ -40,7 +40,7 @@ Architecture: img_size: - 32 - 320 - out_char_num: 40 + out_char_num: 40 # W//4 or W//8 or W/12 out_channels: 96 patch_merging: Conv embed_dim: diff --git a/doc/doc_ch/algorithm_rec_svtr.md b/doc/doc_ch/algorithm_rec_svtr.md index c0e26433..42a1a9a4 100644 --- a/doc/doc_ch/algorithm_rec_svtr.md +++ b/doc/doc_ch/algorithm_rec_svtr.md @@ -159,7 +159,23 @@ Predicts of ./doc/imgs_words_en/word_10.png:('pain', 0.9999998807907104) ## 5. FAQ -1. 由于`SVTR`使用的算子大多为矩阵相乘,在GPU环境下,速度具有优势,但在CPU开启mkldnn加速环境下,`SVTR`相比于被优化的卷积网络没有优势。 +- 1. GPU和CPU速度对比 + - 由于`SVTR`使用的算子大多为矩阵相乘,在GPU环境下,速度具有优势,但在CPU开启mkldnn加速环境下,`SVTR`相比于被优化的卷积网络没有优势。 +- 2. SVTR模型转ONNX失败 + - 保证`paddle2onnx`和`onnxruntime`版本最新,转onnx命令参考[SVTR模型转onnx步骤实例](https://github.com/PaddlePaddle/PaddleOCR/issues/7821#issuecomment-1271214273)。 +- 3. SVTR转ONNX成功但是推理结果不正确 + - 可能的原因模型参数`out_char_num`设置不正确,应设置为W//4、W//8或者W//12,可以参考[高精度中文场景文本识别模型SVTR的3.3.3章节](https://aistudio.baidu.com/aistudio/projectdetail/5073182?contributionType=1)。 +- 4. 长文本识别优化 + - 参考[高精度中文场景文本识别模型SVTR的3.3章节](https://aistudio.baidu.com/aistudio/projectdetail/5073182?contributionType=1)。 +- 5. 论文结果复现注意事项 + - 数据集使用[ABINet](https://github.com/FangShancheng/ABINet)提供的数据集; + - 默认使用4卡GPU训练,单卡Batchsize默认为512,总Batchsize为2048,对应的学习率为0.0005,当修改Batchsize或者改变GPU卡数,学习率应等比例修改。 +- 6. 进一步优化的探索点 + - 学习率调整:可以调整为默认的两倍保持Batchsize不变;或者将Batchsize减小为默认的1/2,保持学习率不变; + - 数据增强策略:可选`RecConAug`和`RecAug`; + - 如果不使用STN时,可以将`mixer`的`Local`替换为`Conv`、`local_mixer`全部修改为`[5, 5]`; + - 网格搜索最优的`embed_dim`、`depth`、`num_heads`配置; + - 使用`后Normalization策略`,即是将模型配置`prenorm`修改为`True`。 ## 引用 diff --git a/doc/doc_en/algorithm_rec_svtr_en.md b/doc/doc_en/algorithm_rec_svtr_en.md index 37cd35f3..d22fe73e 100644 --- a/doc/doc_en/algorithm_rec_svtr_en.md +++ b/doc/doc_en/algorithm_rec_svtr_en.md @@ -130,7 +130,23 @@ Not supported ## 5. FAQ -1. Since most of the operators used by `SVTR` are matrix multiplication, in the GPU environment, the speed has an advantage, but in the environment where mkldnn is enabled on the CPU, `SVTR` has no advantage over the optimized convolutional network. +- 1. Speed situation on CPU and GPU + - Since most of the operators used by `SVTR` are matrix multiplication, in the GPU environment, the speed has an advantage, but in the environment where mkldnn is enabled on the CPU, `SVTR` has no advantage over the optimized convolutional network. +- 2. SVTR model convert to ONNX failed + - Ensure `paddle2onnx` and `onnxruntime` versions are up to date, refer to [SVTR model to onnx step-by-step example](https://github.com/PaddlePaddle/PaddleOCR/issues/7821#issuecomment-) for the convert onnx command. 1271214273). +- 3. SVTR model convert to ONNX is successful but the inference result is incorrect + - The possible reason is that the model parameter `out_char_num` is not set correctly, it should be set to W//4, W//8 or W//12, please refer to [Section 3.3.3 of SVTR, a high-precision Chinese scene text recognition model](https://aistudio.baidu.com/aistudio/) projectdetail/5073182?contributionType=1). +- 4. Optimization of long text recognition + - Refer to [Section 3.3 of SVTR, a high-precision Chinese scene text recognition model](https://aistudio.baidu.com/aistudio/projectdetail/5073182?contributionType=1). +- 5. Notes on the reproduction of the paper results + - Dataset using provided by [ABINet](https://github.com/FangShancheng/ABINet). + - By default, 4 cards of GPUs are used for training, the default Batchsize of a single card is 512, and the total Batchsize is 2048, corresponding to a learning rate of 0.0005. When modifying the Batchsize or changing the number of GPU cards, the learning rate should be modified in equal proportion. +- 6. Exploration Directions for further optimization + - Learning rate adjustment: adjusting to twice the default to keep Batchsize unchanged; or reducing Batchsize to 1/2 the default to keep the learning rate unchanged. + - Data augmentation strategies: optionally `RecConAug` and `RecAug`. + - If STN is not used, `Local` of `mixer` can be replaced by `Conv` and `local_mixer` can all be modified to `[5, 5]`. + - Grid search for optimal `embed_dim`, `depth`, `num_heads` configurations. + - Use the `Post-Normalization strategy`, which is to modify the model configuration `prenorm` to `True`. ## Citation diff --git a/ppocr/data/imaug/__init__.py b/ppocr/data/imaug/__init__.py index 93d97446..121582b4 100644 --- a/ppocr/data/imaug/__init__.py +++ b/ppocr/data/imaug/__init__.py @@ -27,7 +27,7 @@ from .make_pse_gt import MakePseGt from .rec_img_aug import BaseDataAugmentation, RecAug, RecConAug, RecResizeImg, ClsResizeImg, \ SRNRecResizeImg, GrayRecResizeImg, SARRecResizeImg, PRENResizeImg, \ ABINetRecResizeImg, SVTRRecResizeImg, ABINetRecAug, VLRecResizeImg, SPINRecResizeImg, RobustScannerRecResizeImg, \ - RFLRecResizeImg + RFLRecResizeImg, SVTRRecAug from .ssl_img_aug import SSLRotateResize from .randaugment import RandAugment from .copy_paste import CopyPaste diff --git a/ppocr/data/imaug/abinet_aug.py b/ppocr/data/imaug/abinet_aug.py index eefdc75d..1b93751b 100644 --- a/ppocr/data/imaug/abinet_aug.py +++ b/ppocr/data/imaug/abinet_aug.py @@ -405,3 +405,54 @@ class CVColorJitter(object): def __call__(self, img): if random.random() < self.p: return self.transforms(img) else: return img + + +class SVTRDeterioration(object): + def __init__(self, var, degrees, factor, p=0.5): + self.p = p + transforms = [] + if var is not None: + transforms.append(CVGaussianNoise(var=var)) + if degrees is not None: + transforms.append(CVMotionBlur(degrees=degrees)) + if factor is not None: + transforms.append(CVRescale(factor=factor)) + self.transforms = transforms + + def __call__(self, img): + if random.random() < self.p: + random.shuffle(self.transforms) + transforms = Compose(self.transforms) + return transforms(img) + else: + return img + + +class SVTRGeometry(object): + def __init__(self, + aug_type=0, + degrees=15, + translate=(0.3, 0.3), + scale=(0.5, 2.), + shear=(45, 15), + distortion=0.5, + p=0.5): + self.aug_type = aug_type + self.p = p + self.transforms = [] + self.transforms.append(CVRandomRotation(degrees=degrees)) + self.transforms.append(CVRandomAffine( + degrees=degrees, translate=translate, scale=scale, shear=shear)) + self.transforms.append(CVRandomPerspective(distortion=distortion)) + + def __call__(self, img): + if random.random() < self.p: + if self.aug_type: + random.shuffle(self.transforms) + transforms = Compose(self.transforms[:random.randint(1, 3)]) + img = transforms(img) + else: + img = self.transforms[random.randint(0, 2)](img) + return img + else: + return img \ No newline at end of file diff --git a/ppocr/data/imaug/rec_img_aug.py b/ppocr/data/imaug/rec_img_aug.py index e22153bd..48404ab8 100644 --- a/ppocr/data/imaug/rec_img_aug.py +++ b/ppocr/data/imaug/rec_img_aug.py @@ -19,7 +19,7 @@ import random import copy from PIL import Image from .text_image_aug import tia_perspective, tia_stretch, tia_distort -from .abinet_aug import CVGeometry, CVDeterioration, CVColorJitter +from .abinet_aug import CVGeometry, CVDeterioration, CVColorJitter, SVTRGeometry, SVTRDeterioration from paddle.vision.transforms import Compose @@ -109,8 +109,9 @@ class ABINetRecAug(object): scale=(0.5, 2.), shear=(45, 15), distortion=0.5, - p=geometry_p), CVDeterioration( - var=20, degrees=6, factor=4, p=deterioration_p), + p=geometry_p), + CVDeterioration( + var=20, degrees=6, factor=4, p=deterioration_p), CVColorJitter( brightness=0.5, contrast=0.5, @@ -169,6 +170,39 @@ class RecConAug(object): return data +class SVTRRecAug(object): + def __init__(self, + aug_type=0, + geometry_p=0.5, + deterioration_p=0.25, + colorjitter_p=0.25, + **kwargs): + self.transforms = Compose([ + SVTRGeometry( + aug_type=aug_type, + degrees=45, + translate=(0.0, 0.0), + scale=(0.5, 2.), + shear=(45, 15), + distortion=0.5, + p=geometry_p), + SVTRDeterioration( + var=20, degrees=6, factor=4, p=deterioration_p), + CVColorJitter( + brightness=0.5, + contrast=0.5, + saturation=0.5, + hue=0.1, + p=colorjitter_p) + ]) + + def __call__(self, data): + img = data['image'] + img = self.transforms(img) + data['image'] = img + return data + + class ClsResizeImg(object): def __init__(self, image_shape, **kwargs): self.image_shape = image_shape -- Gitee From 8de2ebf25895a8e37c007e4662eec4423fc2a7d4 Mon Sep 17 00:00:00 2001 From: Koutaro Mukai Date: Sun, 29 Jan 2023 17:50:23 +0900 Subject: [PATCH 052/112] fix mismatch between ch and en (#8876) --- doc/doc_en/whl_en.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/doc_en/whl_en.md b/doc/doc_en/whl_en.md index 5628dc3f..5283391e 100644 --- a/doc/doc_en/whl_en.md +++ b/doc/doc_en/whl_en.md @@ -335,7 +335,7 @@ ocr = PaddleOCR(use_angle_cls=True, lang="ch") # need to run only once to downlo img_path = 'PaddleOCR/doc/imgs/11.jpg' img = cv2.imread(img_path) # img = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY), If your own training model supports grayscale images, you can uncomment this line -result = ocr.ocr(img_path, cls=True) +result = ocr.ocr(img, cls=True) for idx in range(len(result)): res = result[idx] for line in res: -- Gitee From a9109b9921dff68ac67cbea9aad519e28464cab2 Mon Sep 17 00:00:00 2001 From: math345 Date: Mon, 30 Jan 2023 16:10:44 +0800 Subject: [PATCH 053/112] fix bug: the index j can not skip 0. (#8918) --- tools/infer/predict_system.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/infer/predict_system.py b/tools/infer/predict_system.py index 19e0525a..1f9e2e1d 100755 --- a/tools/infer/predict_system.py +++ b/tools/infer/predict_system.py @@ -123,7 +123,7 @@ def sorted_boxes(dt_boxes): _boxes = list(sorted_boxes) for i in range(num_boxes - 1): - for j in range(i, 0, -1): + for j in range(i, -1, -1): if abs(_boxes[j + 1][0][1] - _boxes[j][0][1]) < 10 and \ (_boxes[j + 1][0][0] < _boxes[j][0][0]): tmp = _boxes[j] -- Gitee From 49fef4229266ccb731f1aa97b5649d7f5b250a73 Mon Sep 17 00:00:00 2001 From: jaycecd <33342388+jaycecd@users.noreply.github.com> Date: Mon, 30 Jan 2023 16:19:45 +0800 Subject: [PATCH 054/112] =?UTF-8?q?=E6=96=B0=E5=A2=9Egpu=5Fid=E5=8F=82?= =?UTF-8?q?=E6=95=B0=EF=BC=8C=E5=8F=AF=E4=BB=A5=E5=9C=A8=E6=8E=A8=E7=90=86?= =?UTF-8?q?=E6=97=B6=E4=BD=BF=E7=94=A8=E7=89=B9=E5=AE=9AGPU=E8=BF=9B?= =?UTF-8?q?=E8=A1=8C=E6=8E=A8=E7=90=86=20(#8920)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tools/infer/utility.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/infer/utility.py b/tools/infer/utility.py index 207b2e5f..83451e94 100644 --- a/tools/infer/utility.py +++ b/tools/infer/utility.py @@ -42,6 +42,7 @@ def init_args(): parser.add_argument("--min_subgraph_size", type=int, default=15) parser.add_argument("--precision", type=str, default="fp32") parser.add_argument("--gpu_mem", type=int, default=500) + parser.add_argument("--gpu_id", type=int, default=0) # params for text detector parser.add_argument("--image_dir", type=str) @@ -219,7 +220,7 @@ def create_predictor(args, mode, logger): logger.warning( "GPU is not found in current device by nvidia-smi. Please check your device or ignore it if run on jetson." ) - config.enable_use_gpu(args.gpu_mem, 0) + config.enable_use_gpu(args.gpu_mem, args.gpu_id) if args.use_tensorrt: config.enable_tensorrt_engine( workspace_size=1 << 30, -- Gitee From 133d67f27dc8a241d6b2e30a9f047a0fb75bebbe Mon Sep 17 00:00:00 2001 From: Milsk01 <63583351+Milsk01@users.noreply.github.com> Date: Mon, 30 Jan 2023 16:41:27 +0800 Subject: [PATCH 055/112] Fix model download link in documentation (#8569) * Fix model download link in algorithm_overview_en * Fix model download link --- doc/doc_en/algorithm_overview_en.md | 2 +- doc/doc_en/algorithm_rec_vitstr_en.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/doc_en/algorithm_overview_en.md b/doc/doc_en/algorithm_overview_en.md index fad0fb8a..ee47244f 100755 --- a/doc/doc_en/algorithm_overview_en.md +++ b/doc/doc_en/algorithm_overview_en.md @@ -99,7 +99,7 @@ Refer to [DTRB](https://arxiv.org/abs/1904.01906), the training and evaluation r |SAR|Resnet31| 87.20% | rec_r31_sar | [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.1/rec/rec_r31_sar_train.tar) | |SEED|Aster_Resnet| 85.35% | rec_resnet_stn_bilstm_att | [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.1/rec/rec_resnet_stn_bilstm_att.tar) | |SVTR|SVTR-Tiny| 89.25% | rec_svtr_tiny_none_ctc_en | [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/rec_svtr_tiny_none_ctc_en_train.tar) | -|ViTSTR|ViTSTR| 79.82% | rec_vitstr_none_ce | [trained model](https://paddleocr.bj.bcebos.com/rec_vitstr_none_none_train.tar) | +|ViTSTR|ViTSTR| 79.82% | rec_vitstr_none_ce | [trained model](https://paddleocr.bj.bcebos.com/rec_vitstr_none_ce_train.tar) | |ABINet|Resnet45| 90.75% | rec_r45_abinet | [trained model](https://paddleocr.bj.bcebos.com/rec_r45_abinet_train.tar) | |VisionLAN|Resnet45| 90.30% | rec_r45_visionlan | [trained model](https://paddleocr.bj.bcebos.com/VisionLAN/rec_r45_visionlan_train.tar) | |SPIN|ResNet32| 90.00% | rec_r32_gaspin_bilstm_att | [trained model](https://paddleocr.bj.bcebos.com/contribution/rec_r32_gaspin_bilstm_att.tar) | diff --git a/doc/doc_en/algorithm_rec_vitstr_en.md b/doc/doc_en/algorithm_rec_vitstr_en.md index a6f9e2f1..7d5597f6 100644 --- a/doc/doc_en/algorithm_rec_vitstr_en.md +++ b/doc/doc_en/algorithm_rec_vitstr_en.md @@ -25,7 +25,7 @@ Using MJSynth and SynthText two text recognition datasets for training, and eval |Model|Backbone|config|Acc|Download link| | --- | --- | --- | --- | --- | -|ViTSTR|ViTSTR|[rec_vitstr_none_ce.yml](../../configs/rec/rec_vitstr_none_ce.yml)|79.82%|[trained model](https://paddleocr.bj.bcebos.com/rec_vitstr_none_none_train.tar)| +|ViTSTR|ViTSTR|[rec_vitstr_none_ce.yml](../../configs/rec/rec_vitstr_none_ce.yml)|79.82%|[trained model](https://paddleocr.bj.bcebos.com/rec_vitstr_none_ce_train.tar)| ## 2. Environment -- Gitee From b20014e89a2ae6b632b2cb2cf8d1910e80f74e07 Mon Sep 17 00:00:00 2001 From: Double_V Date: Tue, 31 Jan 2023 11:32:18 +0800 Subject: [PATCH 056/112] fix doc --- doc/doc_en/models_list_en.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/doc_en/models_list_en.md b/doc/doc_en/models_list_en.md index 3ec5013c..168d63d3 100644 --- a/doc/doc_en/models_list_en.md +++ b/doc/doc_en/models_list_en.md @@ -52,7 +52,7 @@ Relationship of the above models is as follows. |model name|description|config|model size|download| | --- | --- | --- | --- | --- | |en_PP-OCRv3_det_slim | [New] Slim qunatization with distillation lightweight detection model, supporting English | [ch_PP-OCRv3_det_cml.yml](../../configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_cml.yml) | 1.1M |[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_slim_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_slim_distill_train.tar) / [nb model](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_slim_infer.nb) | -|ch_PP-OCRv3_det | [New] Original lightweight detection model, supporting English |[ch_PP-OCRv3_det_cml.yml](../../configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_cml.yml)| 3.8M | [inference model](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_distill_train.tar) | +|en_PP-OCRv3_det | [New] Original lightweight detection model, supporting English |[ch_PP-OCRv3_det_cml.yml](../../configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_cml.yml)| 3.8M | [inference model](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_distill_train.tar) | * Note: English configuration file is same as Chinese except training data, here we only provide one configuration file. -- Gitee From 0ed9d8889f7ba90949e64fa6e3240df282983a77 Mon Sep 17 00:00:00 2001 From: Evezerest <50011306+Evezerest@users.noreply.github.com> Date: Fri, 3 Feb 2023 14:44:57 +0800 Subject: [PATCH 057/112] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 8af32c87..006fc48f 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ PaddleOCR aims to create multilingual, awesome, leading, and practical OCR tools ## 📣 Recent updates -- 🔨**2022.11 Add implementation of [4 cutting-edge algorithms](doc/doc_ch/algorithm_overview.md)**:Text Detection [DRRG](doc/doc_en/algorithm_det_drrg_en.md), Text Recognition [RFL](./doc/doc_en/algorithm_rec_rfl_en.md), Image Super-Resolution [Text Telescope](doc/doc_en/algorithm_sr_telescope_en.md),Handwrittem Mathematical Expression Recognition [CAN](doc/doc_en/algorithm_rec_can_en.md) +- 🔨**2022.11 Add implementation of [4 cutting-edge algorithms](doc/doc_ch/algorithm_overview.md)**:Text Detection [DRRG](doc/doc_en/algorithm_det_drrg_en.md), Text Recognition [RFL](./doc/doc_en/algorithm_rec_rfl_en.md), Image Super-Resolution [Text Telescope](doc/doc_en/algorithm_sr_telescope_en.md),Handwritten Mathematical Expression Recognition [CAN](doc/doc_en/algorithm_rec_can_en.md) - **2022.10 Release [optimized JS version PP-OCRv3 model](./deploy/paddlejs/README.md)** with 4.3M model size, 8x faster inference time, and a ready-to-use web demo - 💥 **Live Playback: Introduction to PP-StructureV2 optimization strategy**. Scan [the QR code below](#Community) using WeChat, follow the PaddlePaddle official account and fill out the questionnaire to join the WeChat group, get the live link and 20G OCR learning materials (including PDF2Word application, 10 models in vertical scenarios, etc.) -- Gitee From 14adbd09f2ab768e14ba7e9b5564b21ae776702a Mon Sep 17 00:00:00 2001 From: topduke <784990967@qq.com> Date: Tue, 7 Feb 2023 14:12:06 +0800 Subject: [PATCH 058/112] fix piecewise boundaries and droppath dtype (#8990) * fix tipc piecewise boundaries and droppath dtype * fix piecewise boundaries and droppath dtype --- configs/rec/PP-OCRv3/ch_PP-OCRv3_rec_distillation.yml | 2 +- configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec.yml | 2 +- configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_distillation.yml | 2 +- configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_enhanced_ctc_loss.yml | 2 +- configs/rec/rec_resnet_stn_bilstm_att.yml | 4 ++-- ppocr/modeling/backbones/rec_svtrnet.py | 2 +- .../configs/ch_PP-OCRv2_rec/ch_PP-OCRv2_rec_distillation.yml | 2 +- .../configs/ch_PP-OCRv3_rec/ch_PP-OCRv3_rec_distillation.yml | 2 +- 8 files changed, 9 insertions(+), 9 deletions(-) diff --git a/configs/rec/PP-OCRv3/ch_PP-OCRv3_rec_distillation.yml b/configs/rec/PP-OCRv3/ch_PP-OCRv3_rec_distillation.yml index 42725573..afa012b6 100644 --- a/configs/rec/PP-OCRv3/ch_PP-OCRv3_rec_distillation.yml +++ b/configs/rec/PP-OCRv3/ch_PP-OCRv3_rec_distillation.yml @@ -27,7 +27,7 @@ Optimizer: beta2: 0.999 lr: name: Piecewise - decay_epochs : [700, 800] + decay_epochs : [700] values : [0.0005, 0.00005] warmup_epoch: 5 regularizer: diff --git a/configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec.yml b/configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec.yml index e2aa5010..793bb5c9 100644 --- a/configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec.yml +++ b/configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec.yml @@ -27,7 +27,7 @@ Optimizer: beta2: 0.999 lr: name: Piecewise - decay_epochs : [700, 800] + decay_epochs : [700] values : [0.001, 0.0001] warmup_epoch: 5 regularizer: diff --git a/configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_distillation.yml b/configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_distillation.yml index ab48b997..dddfc331 100644 --- a/configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_distillation.yml +++ b/configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_distillation.yml @@ -27,7 +27,7 @@ Optimizer: beta2: 0.999 lr: name: Piecewise - decay_epochs : [700, 800] + decay_epochs : [700] values : [0.001, 0.0001] warmup_epoch: 5 regularizer: diff --git a/configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_enhanced_ctc_loss.yml b/configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_enhanced_ctc_loss.yml index 5be96969..ef0e893f 100644 --- a/configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_enhanced_ctc_loss.yml +++ b/configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_enhanced_ctc_loss.yml @@ -27,7 +27,7 @@ Optimizer: beta2: 0.999 lr: name: Piecewise - decay_epochs : [700, 800] + decay_epochs : [700] values : [0.001, 0.0001] warmup_epoch: 5 regularizer: diff --git a/configs/rec/rec_resnet_stn_bilstm_att.yml b/configs/rec/rec_resnet_stn_bilstm_att.yml index 0bb90b35..20ed9533 100644 --- a/configs/rec/rec_resnet_stn_bilstm_att.yml +++ b/configs/rec/rec_resnet_stn_bilstm_att.yml @@ -1,6 +1,6 @@ Global: use_gpu: True - epoch_num: 400 + epoch_num: 6 log_smooth_window: 20 print_batch_step: 10 save_model_dir: ./output/rec/seed @@ -27,7 +27,7 @@ Optimizer: momentum: 0.9 lr: name: Piecewise - decay_epochs: [4,5,8] + decay_epochs: [4, 5] values: [1.0, 0.1, 0.01] regularizer: name: 'L2' diff --git a/ppocr/modeling/backbones/rec_svtrnet.py b/ppocr/modeling/backbones/rec_svtrnet.py index c2c07f44..de5920c6 100644 --- a/ppocr/modeling/backbones/rec_svtrnet.py +++ b/ppocr/modeling/backbones/rec_svtrnet.py @@ -32,7 +32,7 @@ def drop_path(x, drop_prob=0., training=False): """ if drop_prob == 0. or not training: return x - keep_prob = paddle.to_tensor(1 - drop_prob) + keep_prob = paddle.to_tensor(1 - drop_prob, dtype=x.dtype) shape = (paddle.shape(x)[0], ) + (1, ) * (x.ndim - 1) random_tensor = keep_prob + paddle.rand(shape, dtype=x.dtype) random_tensor = paddle.floor(random_tensor) # binarize diff --git a/test_tipc/configs/ch_PP-OCRv2_rec/ch_PP-OCRv2_rec_distillation.yml b/test_tipc/configs/ch_PP-OCRv2_rec/ch_PP-OCRv2_rec_distillation.yml index 3eb82d42..43e14b84 100644 --- a/test_tipc/configs/ch_PP-OCRv2_rec/ch_PP-OCRv2_rec_distillation.yml +++ b/test_tipc/configs/ch_PP-OCRv2_rec/ch_PP-OCRv2_rec_distillation.yml @@ -27,7 +27,7 @@ Optimizer: beta2: 0.999 lr: name: Piecewise - decay_epochs : [700, 800] + decay_epochs : [700] values : [0.001, 0.0001] warmup_epoch: 5 regularizer: diff --git a/test_tipc/configs/ch_PP-OCRv3_rec/ch_PP-OCRv3_rec_distillation.yml b/test_tipc/configs/ch_PP-OCRv3_rec/ch_PP-OCRv3_rec_distillation.yml index 4c8ba0a6..8150d95e 100644 --- a/test_tipc/configs/ch_PP-OCRv3_rec/ch_PP-OCRv3_rec_distillation.yml +++ b/test_tipc/configs/ch_PP-OCRv3_rec/ch_PP-OCRv3_rec_distillation.yml @@ -27,7 +27,7 @@ Optimizer: beta2: 0.999 lr: name: Piecewise - decay_epochs : [700, 800] + decay_epochs : [700] values : [0.0005, 0.00005] warmup_epoch: 5 regularizer: -- Gitee From 7ce0a5fc2ebd6331fa2f71ef11ed0e132c715a6c Mon Sep 17 00:00:00 2001 From: MissPenguin Date: Tue, 7 Feb 2023 14:35:20 +0800 Subject: [PATCH 059/112] Update algorithm_overview.md --- doc/doc_ch/algorithm_overview.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/doc_ch/algorithm_overview.md b/doc/doc_ch/algorithm_overview.md index 02a4cbad..49637099 100755 --- a/doc/doc_ch/algorithm_overview.md +++ b/doc/doc_ch/algorithm_overview.md @@ -129,7 +129,7 @@ PaddleOCR将**持续新增**支持OCR领域前沿算法与模型,**欢迎广 ### 1.4 公式识别算法 已支持的公式识别算法列表(戳链接获取使用教程): -- [x] [CAN](./algorithm_rec_can.md.md) +- [x] [CAN](./algorithm_rec_can.md) 在CROHME手写公式数据集上,算法效果如下: -- Gitee From d78b945608a6599e0b97101ce9f31e2d5ca89469 Mon Sep 17 00:00:00 2001 From: MissPenguin Date: Tue, 7 Feb 2023 14:38:26 +0800 Subject: [PATCH 060/112] Update algorithm_overview_en.md --- doc/doc_en/algorithm_overview_en.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/doc_en/algorithm_overview_en.md b/doc/doc_en/algorithm_overview_en.md index ee47244f..be05dd91 100755 --- a/doc/doc_en/algorithm_overview_en.md +++ b/doc/doc_en/algorithm_overview_en.md @@ -127,7 +127,7 @@ On the TextZoom public dataset, the effect of the algorithm is as follows: Supported formula recognition algorithms (Click the link to get the tutorial): -- [x] [CAN](./algorithm_rec_can.md.md) +- [x] [CAN](./algorithm_rec_can_en.md) On the CROHME handwritten formula dataset, the effect of the algorithm is as follows: -- Gitee From 0850586667308d38e113447e8d095e955092fe53 Mon Sep 17 00:00:00 2001 From: xiaoting <31891223+tink2123@users.noreply.github.com> Date: Tue, 14 Feb 2023 16:21:24 +0800 Subject: [PATCH 061/112] rename tipc svtr name (#9072) --- .../configs/ch_PP-OCRv3_rec/ch_PP-OCRv3_rec_distillation.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test_tipc/configs/ch_PP-OCRv3_rec/ch_PP-OCRv3_rec_distillation.yml b/test_tipc/configs/ch_PP-OCRv3_rec/ch_PP-OCRv3_rec_distillation.yml index 8150d95e..b61e5e46 100644 --- a/test_tipc/configs/ch_PP-OCRv3_rec/ch_PP-OCRv3_rec_distillation.yml +++ b/test_tipc/configs/ch_PP-OCRv3_rec/ch_PP-OCRv3_rec_distillation.yml @@ -45,7 +45,7 @@ Architecture: freeze_params: false return_all_feats: true model_type: *model_type - algorithm: SVTR + algorithm: SVTR_LCNet Transform: Backbone: name: MobileNetV1Enhance @@ -72,7 +72,7 @@ Architecture: freeze_params: false return_all_feats: true model_type: *model_type - algorithm: SVTR + algorithm: SVTR_LCNet Transform: Backbone: name: MobileNetV1Enhance -- Gitee From a923f35de57b5e378f8dd16e54d0a3e4f51267fd Mon Sep 17 00:00:00 2001 From: Double_V Date: Wed, 15 Feb 2023 10:37:40 +0800 Subject: [PATCH 062/112] delte unused params (#9078) --- ppocr/modeling/heads/det_db_head.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/ppocr/modeling/heads/det_db_head.py b/ppocr/modeling/heads/det_db_head.py index a686ae5a..77cb6f1d 100644 --- a/ppocr/modeling/heads/det_db_head.py +++ b/ppocr/modeling/heads/det_db_head.py @@ -31,7 +31,7 @@ def get_bias_attr(k): class Head(nn.Layer): - def __init__(self, in_channels, name_list, kernel_list=[3, 2, 2], **kwargs): + def __init__(self, in_channels, kernel_list=[3, 2, 2], **kwargs): super(Head, self).__init__() self.conv1 = nn.Conv2D( @@ -93,16 +93,8 @@ class DBHead(nn.Layer): def __init__(self, in_channels, k=50, **kwargs): super(DBHead, self).__init__() self.k = k - binarize_name_list = [ - 'conv2d_56', 'batch_norm_47', 'conv2d_transpose_0', 'batch_norm_48', - 'conv2d_transpose_1', 'binarize' - ] - thresh_name_list = [ - 'conv2d_57', 'batch_norm_49', 'conv2d_transpose_2', 'batch_norm_50', - 'conv2d_transpose_3', 'thresh' - ] - self.binarize = Head(in_channels, binarize_name_list, **kwargs) - self.thresh = Head(in_channels, thresh_name_list, **kwargs) + self.binarize = Head(in_channels, **kwargs) + self.thresh = Head(in_channels, **kwargs) def step_function(self, x, y): return paddle.reciprocal(1 + paddle.exp(-self.k * (x - y))) -- Gitee From b61ee4706602022e741d7b70086cc27e6e8726a7 Mon Sep 17 00:00:00 2001 From: andyj <87074272+andyjpaddle@users.noreply.github.com> Date: Thu, 23 Feb 2023 16:31:26 +0800 Subject: [PATCH 063/112] [doc] Fix doc (#9203) * add en doc * fix dead link & test=document_fix * fix dead link & test=document_fix * update args default type & test=document_fix * fix doc & test=document_fix --- doc/doc_ch/quickstart.md | 2 +- doc/doc_en/quickstart_en.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/doc_ch/quickstart.md b/doc/doc_ch/quickstart.md index cdae287f..189517a4 100644 --- a/doc/doc_ch/quickstart.md +++ b/doc/doc_ch/quickstart.md @@ -211,7 +211,7 @@ from paddleocr import PaddleOCR, draw_ocr # Paddleocr目前支持的多语言语种可以通过修改lang参数进行切换 # 例如`ch`, `en`, `fr`, `german`, `korean`, `japan` -ocr = PaddleOCR(use_angle_cls=True, lang="ch", page_num=2) # need to run only once to download and load model into memory +ocr = PaddleOCR(use_angle_cls=True, lang="ch", page_num=2) # need to run only once to download and load model into memory img_path = './xxx.pdf' result = ocr.ocr(img_path, cls=True) for idx in range(len(result)): diff --git a/doc/doc_en/quickstart_en.md b/doc/doc_en/quickstart_en.md index 3479e190..fecc34e8 100644 --- a/doc/doc_en/quickstart_en.md +++ b/doc/doc_en/quickstart_en.md @@ -223,7 +223,7 @@ from paddleocr import PaddleOCR, draw_ocr # Paddleocr supports Chinese, English, French, German, Korean and Japanese. # You can set the parameter `lang` as `ch`, `en`, `fr`, `german`, `korean`, `japan` # to switch the language model in order. -ocr = PaddleOCR(use_angle_cls=True, lang="ch", page_num=2) # need to run only once to download and load model into memory +ocr = PaddleOCR(use_angle_cls=True, lang="ch", page_num=2) # need to run only once to download and load model into memory img_path = './xxx.pdf' result = ocr.ocr(img_path, cls=True) for idx in range(len(result)): -- Gitee From e904b54c52ae4baea5ed02caf90a085218f12b8d Mon Sep 17 00:00:00 2001 From: Evezerest <50011306+Evezerest@users.noreply.github.com> Date: Fri, 24 Feb 2023 17:04:15 +0800 Subject: [PATCH 064/112] Update README_ch.md --- PPOCRLabel/README_ch.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PPOCRLabel/README_ch.md b/PPOCRLabel/README_ch.md index 5359afc4..8f8517f8 100644 --- a/PPOCRLabel/README_ch.md +++ b/PPOCRLabel/README_ch.md @@ -139,7 +139,7 @@ pip3 install dist/PPOCRLabel-0.0.0-py2.py3-none-any.whl -i https://mirror.baidu. 4. 标注表格结构:**在外部Excel软件中,将存在文字的单元格标记为任意标识符(如 `1` )**,保证Excel中的单元格合并情况与原图相同即可(即不需要Excel中的单元格文字与图片中的文字完全相同) -5. 导出JSON格式:关闭所有表格图像对应的Excel,点击 `文件`-`导出表格JSON标注` 获得JSON标注结果。 +5. 导出JSON格式:关闭所有表格图像对应的Excel,点击 `文件`-`导出表格标注`,生成gt.txt标注文件。 ### 2.3 注意 -- Gitee From 04457a80430c4c8823b98208fc3b07b3cd95bb8b Mon Sep 17 00:00:00 2001 From: Evezerest <50011306+Evezerest@users.noreply.github.com> Date: Fri, 24 Feb 2023 17:15:02 +0800 Subject: [PATCH 065/112] Update README_ch.md --- PPOCRLabel/README_ch.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PPOCRLabel/README_ch.md b/PPOCRLabel/README_ch.md index 8f8517f8..665d0560 100644 --- a/PPOCRLabel/README_ch.md +++ b/PPOCRLabel/README_ch.md @@ -126,7 +126,7 @@ pip3 install dist/PPOCRLabel-0.0.0-py2.py3-none-any.whl -i https://mirror.baidu. 9. 删除:点击 “删除图像”,图片将会被删除至回收站。 10. 导出结果:用户可以通过菜单中“文件-导出标记结果”手动导出,同时也可以点击“文件 - 自动导出标记结果”开启自动导出。手动确认过的标记将会被存放在所打开图片文件夹下的*Label.txt*中。在菜单栏点击 “文件” - "导出识别结果"后,会将此类图片的识别训练数据保存在*crop_img*文件夹下,识别标签保存在*rec_gt.txt*中[4]。 -### 2.2 表格标注 +### 2.2 表格标注([视频演示](https://www.bilibili.com/video/BV1wR4y1v7JE/?share_source=copy_web&vd_source=cf1f9d24648d49636e3d109c9f9a377d&t=1998)) 表格标注针对表格的结构化提取,将图片中的表格转换为Excel格式,因此标注时需要配合外部软件打开Excel同时完成。在PPOCRLabel软件中完成表格中的文字信息标注(文字与位置)、在Excel文件中完成表格结构信息标注,推荐的步骤为: 1. 表格识别:打开表格图片后,点击软件右上角 `表格识别` 按钮,软件调用PP-Structure中的表格识别模型,自动为表格打标签,同时弹出Excel -- Gitee From 366ad29d6c202a79bad103c72556c1186915c9c8 Mon Sep 17 00:00:00 2001 From: Evezerest <50011306+Evezerest@users.noreply.github.com> Date: Fri, 24 Feb 2023 17:20:48 +0800 Subject: [PATCH 066/112] Update README.md --- PPOCRLabel/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PPOCRLabel/README.md b/PPOCRLabel/README.md index ec933a83..399debbd 100644 --- a/PPOCRLabel/README.md +++ b/PPOCRLabel/README.md @@ -157,7 +157,7 @@ labeling in the Excel file, the recommended steps are: 4. ***Adjust cell order:*** Click on the menu `View` - `Show Box Number` to show the box ordinal numbers, and drag all the results under the 'Recognition Results' column on the right side of the software interface to make the box numbers are arranged from left to right, top to bottom -5. Export JSON format annotation: close all Excel files corresponding to table images, click `File`-`Export table JSON annotation` to obtain JSON annotation results. +5. Export JSON format annotation: close all Excel files corresponding to table images, click `File-Export Table Label` to obtain `gt.txt` annotation results. ### 2.3 Note -- Gitee From d8d76d59c90d922782a4c812f9c720ac77877d5a Mon Sep 17 00:00:00 2001 From: Mohamad Mansour Date: Fri, 3 Mar 2023 05:11:55 +0200 Subject: [PATCH 067/112] Update README.md (#9176) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 006fc48f..1a9f5d4e 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ PaddleOCR aims to create multilingual, awesome, leading, and practical OCR tools ## 📣 Recent updates -- 🔨**2022.11 Add implementation of [4 cutting-edge algorithms](doc/doc_ch/algorithm_overview.md)**:Text Detection [DRRG](doc/doc_en/algorithm_det_drrg_en.md), Text Recognition [RFL](./doc/doc_en/algorithm_rec_rfl_en.md), Image Super-Resolution [Text Telescope](doc/doc_en/algorithm_sr_telescope_en.md),Handwritten Mathematical Expression Recognition [CAN](doc/doc_en/algorithm_rec_can_en.md) +- 🔨**2022.11 Add implementation of [4 cutting-edge algorithms](doc/doc_en/algorithm_overview_en.md)**:Text Detection [DRRG](doc/doc_en/algorithm_det_drrg_en.md), Text Recognition [RFL](./doc/doc_en/algorithm_rec_rfl_en.md), Image Super-Resolution [Text Telescope](doc/doc_en/algorithm_sr_telescope_en.md),Handwritten Mathematical Expression Recognition [CAN](doc/doc_en/algorithm_rec_can_en.md) - **2022.10 Release [optimized JS version PP-OCRv3 model](./deploy/paddlejs/README.md)** with 4.3M model size, 8x faster inference time, and a ready-to-use web demo - 💥 **Live Playback: Introduction to PP-StructureV2 optimization strategy**. Scan [the QR code below](#Community) using WeChat, follow the PaddlePaddle official account and fill out the questionnaire to join the WeChat group, get the live link and 20G OCR learning materials (including PDF2Word application, 10 models in vertical scenarios, etc.) -- Gitee From 15d7ef576561ffa7330ea639324e784f5bc43590 Mon Sep 17 00:00:00 2001 From: Double_V Date: Fri, 10 Mar 2023 10:48:25 +0800 Subject: [PATCH 068/112] Add FastDeploy to Recent updates (#9376) * delte unused params * add fastdeploy to Recent updates --- README_ch.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README_ch.md b/README_ch.md index a54634df..c8acb6fe 100755 --- a/README_ch.md +++ b/README_ch.md @@ -27,6 +27,7 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库,助力 ## 📣 近期更新 +- **🔥2023.3.10 PaddleOCR集成了高性能、全场景模型部署方案FastDeploy,欢迎参考[指南](https://github.com/PaddlePaddle/PaddleOCR/tree/dygraph/deploy/fastdeploy)试用(注意使用dygraph分支)。** - 📚**2022.12 发布[《OCR产业范例20讲》电子书](./applications/README.md)**,新增蒙古文、身份证、液晶屏缺陷等**7个场景应用范例** - 🔨**2022.11 新增实现[4种前沿算法](doc/doc_ch/algorithm_overview.md)**:文本检测 [DRRG](doc/doc_ch/algorithm_det_drrg.md), 文本识别 [RFL](doc/doc_ch/algorithm_rec_rfl.md), 文本超分[Text Telescope](doc/doc_ch/algorithm_sr_telescope.md),公式识别[CAN](doc/doc_ch/algorithm_rec_can.md) - **2022.10 优化[JS版PP-OCRv3模型](./deploy/paddlejs/README_ch.md)**:模型大小仅4.3M,预测速度提升8倍,配套web demo开箱即用 -- Gitee From 9c22490f958f42b6c7f88f42374a78addb7e75dd Mon Sep 17 00:00:00 2001 From: Jiang Yuwei Date: Tue, 14 Mar 2023 09:47:43 +0800 Subject: [PATCH 069/112] Fix numpy.bool deprecation problem (#9404) --- ppocr/data/imaug/label_ops.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ppocr/data/imaug/label_ops.py b/ppocr/data/imaug/label_ops.py index 8f56ea10..6ec5da28 100644 --- a/ppocr/data/imaug/label_ops.py +++ b/ppocr/data/imaug/label_ops.py @@ -64,7 +64,7 @@ class DetLabelEncode(object): return None boxes = self.expand_points_num(boxes) boxes = np.array(boxes, dtype=np.float32) - txt_tags = np.array(txt_tags, dtype=np.bool) + txt_tags = np.array(txt_tags, dtype=bool) data['polys'] = boxes data['texts'] = txts @@ -218,7 +218,7 @@ class E2ELabelEncodeTest(BaseRecLabelEncode): else: txt_tags.append(False) boxes = np.array(boxes, dtype=np.float32) - txt_tags = np.array(txt_tags, dtype=np.bool) + txt_tags = np.array(txt_tags, dtype=bool) data['polys'] = boxes data['ignore_tags'] = txt_tags temp_texts = [] @@ -254,7 +254,7 @@ class E2ELabelEncodeTrain(object): else: txt_tags.append(False) boxes = np.array(boxes, dtype=np.float32) - txt_tags = np.array(txt_tags, dtype=np.bool) + txt_tags = np.array(txt_tags, dtype=bool) data['polys'] = boxes data['texts'] = txts -- Gitee From 2be9fe763e4ebb55d59cc192eee437b0e09207ce Mon Sep 17 00:00:00 2001 From: xiaoting <31891223+tink2123@users.noreply.github.com> Date: Wed, 15 Mar 2023 17:34:37 +0800 Subject: [PATCH 070/112] rever 'limit maximum resizing ratio' (#9449) --- ppocr/data/imaug/rec_img_aug.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/ppocr/data/imaug/rec_img_aug.py b/ppocr/data/imaug/rec_img_aug.py index 48404ab8..33cd9f17 100644 --- a/ppocr/data/imaug/rec_img_aug.py +++ b/ppocr/data/imaug/rec_img_aug.py @@ -109,9 +109,8 @@ class ABINetRecAug(object): scale=(0.5, 2.), shear=(45, 15), distortion=0.5, - p=geometry_p), - CVDeterioration( - var=20, degrees=6, factor=4, p=deterioration_p), + p=geometry_p), CVDeterioration( + var=20, degrees=6, factor=4, p=deterioration_p), CVColorJitter( brightness=0.5, contrast=0.5, @@ -185,9 +184,8 @@ class SVTRRecAug(object): scale=(0.5, 2.), shear=(45, 15), distortion=0.5, - p=geometry_p), - SVTRDeterioration( - var=20, degrees=6, factor=4, p=deterioration_p), + p=geometry_p), SVTRDeterioration( + var=20, degrees=6, factor=4, p=deterioration_p), CVColorJitter( brightness=0.5, contrast=0.5, @@ -572,7 +570,7 @@ def resize_norm_img_chinese(img, image_shape): max_wh_ratio = imgW * 1.0 / imgH h, w = img.shape[0], img.shape[1] ratio = w * 1.0 / h - max_wh_ratio = min(max(max_wh_ratio, ratio), max_wh_ratio) + max_wh_ratio = max(max_wh_ratio, ratio) imgW = int(imgH * max_wh_ratio) if math.ceil(imgH * ratio) > imgW: resized_w = imgW -- Gitee From 3fcf12be852b200652083478ec1dd56fb8b8610f Mon Sep 17 00:00:00 2001 From: Harsh Nandwana <32096704+harshnandwana@users.noreply.github.com> Date: Mon, 10 Apr 2023 09:18:52 +0530 Subject: [PATCH 071/112] Update readme.md (#9550) --- deploy/paddle2onnx/readme.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deploy/paddle2onnx/readme.md b/deploy/paddle2onnx/readme.md index bac29784..7604f8dc 100644 --- a/deploy/paddle2onnx/readme.md +++ b/deploy/paddle2onnx/readme.md @@ -75,8 +75,8 @@ paddle2onnx --model_dir ./inference/en_PP-OCRv3_rec_infer \ --enable_onnx_checker True paddle2onnx --model_dir ./inference/ch_ppocr_mobile_v2.0_cls_infer \ ---model_filename ch_ppocr_mobile_v2.0_cls_infer/inference.pdmodel \ ---params_filename ch_ppocr_mobile_v2.0_cls_infer/inference.pdiparams \ +--model_filename inference.pdmodel \ +--params_filename inference.pdiparams \ --save_file ./inferencecls_onnx/model.onnx \ --opset_version 10 \ --input_shape_dict="{'x':[-1,3,-1,-1]}" \ -- Gitee From 7bae3db2eca3ac099fefb716ce4b15cfa6e6bd1c Mon Sep 17 00:00:00 2001 From: Wang Xin Date: Mon, 10 Apr 2023 11:50:37 +0800 Subject: [PATCH 072/112] Update readme.md (#9546) fix typo Co-authored-by: Double_V --- deploy/paddle2onnx/readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/paddle2onnx/readme.md b/deploy/paddle2onnx/readme.md index 7604f8dc..e6e8381f 100644 --- a/deploy/paddle2onnx/readme.md +++ b/deploy/paddle2onnx/readme.md @@ -77,7 +77,7 @@ paddle2onnx --model_dir ./inference/en_PP-OCRv3_rec_infer \ paddle2onnx --model_dir ./inference/ch_ppocr_mobile_v2.0_cls_infer \ --model_filename inference.pdmodel \ --params_filename inference.pdiparams \ ---save_file ./inferencecls_onnx/model.onnx \ +--save_file ./inference/cls_onnx/model.onnx \ --opset_version 10 \ --input_shape_dict="{'x':[-1,3,-1,-1]}" \ --enable_onnx_checker True -- Gitee From 4b8e333f102bcee9b1a0685a3f709b72ca800810 Mon Sep 17 00:00:00 2001 From: duanyanhui <45005871+YanhuiDua@users.noreply.github.com> Date: Mon, 10 Apr 2023 16:30:54 +0800 Subject: [PATCH 073/112] [npu] Update npu api (#9686) * update npu api * add version check --- tools/program.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/tools/program.py b/tools/program.py index afb8a472..a8373435 100755 --- a/tools/program.py +++ b/tools/program.py @@ -134,9 +134,18 @@ def check_device(use_gpu, use_xpu=False, use_npu=False, use_mlu=False): if use_xpu and not paddle.device.is_compiled_with_xpu(): print(err.format("use_xpu", "xpu", "xpu", "use_xpu")) sys.exit(1) - if use_npu and not paddle.device.is_compiled_with_npu(): - print(err.format("use_npu", "npu", "npu", "use_npu")) - sys.exit(1) + if use_npu: + if int(paddle.version.major) != 0 and int( + paddle.version.major) <= 2 and int( + paddle.version.minor) <= 4: + if not paddle.device.is_compiled_with_npu(): + print(err.format("use_npu", "npu", "npu", "use_npu")) + sys.exit(1) + # is_compiled_with_npu() has been updated after paddle-2.4 + else: + if not paddle.device.is_compiled_with_custom_device("npu"): + print(err.format("use_npu", "npu", "npu", "use_npu")) + sys.exit(1) if use_mlu and not paddle.device.is_compiled_with_mlu(): print(err.format("use_mlu", "mlu", "mlu", "use_mlu")) sys.exit(1) -- Gitee From 013870d9bc963f7508d5a76f81da872b84471a71 Mon Sep 17 00:00:00 2001 From: duanyanhui <45005871+YanhuiDua@users.noreply.github.com> Date: Thu, 20 Apr 2023 10:26:37 +0800 Subject: [PATCH 074/112] update npu inference api (#9764) --- tools/infer/utility.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/infer/utility.py b/tools/infer/utility.py index 83451e94..959373cd 100644 --- a/tools/infer/utility.py +++ b/tools/infer/utility.py @@ -246,7 +246,7 @@ def create_predictor(args, mode, logger): logger.info("Please keep your paddlepaddle-gpu >= 2.3.0!") elif args.use_npu: - config.enable_npu() + config.enable_custom_device("npu") elif args.use_xpu: config.enable_xpu(10 * 1024 * 1024) else: -- Gitee From 44316ac7fded076889e6b916b39a9b984c719c74 Mon Sep 17 00:00:00 2001 From: Bryan YW Date: Mon, 29 May 2023 18:34:47 +0800 Subject: [PATCH 075/112] Improve package import compatibility (#10052) This update is to fix package import compatibility issue, such as paddleocr & detectron2. with this updates, detectron2 can be imported along side with paddleocr. --- paddleocr.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/paddleocr.py b/paddleocr.py index af0145b4..ba707d66 100644 --- a/paddleocr.py +++ b/paddleocr.py @@ -27,9 +27,17 @@ import logging import numpy as np from pathlib import Path -tools = importlib.import_module('.', 'tools') -ppocr = importlib.import_module('.', 'ppocr') -ppstructure = importlib.import_module('.', 'ppstructure') +def _import_file(module_name, file_path, make_importable=False): + spec = importlib.util.spec_from_file_location(module_name, file_path) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + if make_importable: + sys.modules[module_name] = module + return module + +tools = _import_file('tools', os.path.join(__dir__, 'tools/__init__.py'), make_importable=True) +ppocr = importlib.import_module('ppocr', 'paddleocr') +ppstructure = importlib.import_module('ppstructure', 'paddleocr') from tools.infer import predict_system from ppocr.utils.logging import get_logger -- Gitee From 991fb7c46a878de17659940e6440df08fff5e711 Mon Sep 17 00:00:00 2001 From: xiaoting <31891223+tink2123@users.noreply.github.com> Date: Mon, 12 Jun 2023 16:09:53 +0800 Subject: [PATCH 076/112] update wechat code (#10144) --- doc/joinus.PNG | Bin 102492 -> 18395 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/doc/joinus.PNG b/doc/joinus.PNG index 6489247e05d70896e2ca8a5929948437c6c82b5f..aef92ec8d1f61f6fa8766e8e7a9e56993a3e6f13 100644 GIT binary patch literal 18395 zcmd_ScQl+))HkYzNQ6WX5+zLuhNvNG5+TtNgeZgPU35k-Awq;GqnAX99z-`t(?)L- zJ=!ov9WBam5959BTKA7z-tWEN`qujXuz1GyoU?!Dx63(aZ(+|=6{xA0sfdV(sGlf4 zQYRuJ-aP)IJOe&K4o$uT--w;n6&?^3cU{APheu{FpO~wt5ZwVkQxXw}S`m>Qi-0d? z@I^#Knn6rN3ceE`f0jY=AHOBu%pmNB9bL~@<>k8gLrxTTmtlk&(8YM z7=L10$Ls^a8ac|Ss3=(?^5?PE*Ixu%+W8cnue6>V(xrXY^N%w6V}6l`+(RC%?1tr8 zOux3S;mvrCJ7?*m*&mR#Q9pp(N_-+%(}{QL>ex5*N{ByO?l)8+oNqh`4R zrJL?5*jUbQu^LF68hnM2nCu4Mvn!aPehL<*YwZKpJU~ zBIAi5EqjkA&B^ENQKTefunXsJK#yNQE9);iFLf->?k{_Rl6FB{;H?WW;q4@;J13tL zk%+U)X3RIS_mLCE0fUhK?>Y$kS6O~8E?VCo4zE#Acby|1D@#jC+lZp8MdZ*4IAp6f z7TVzLd24jJZ=NeM#?BAA_jGs(pX$2nWwk${$jA6HBUdzVpMW0wUk=%fy4~l@CwK=u z1Mp{FIl~-qg1Ub)QNRjX!HNL+gX^*xp~hD%$&MF_h{T?RtTve~`{KzGQobPyV14&0 z<09d$)n{oJ?0sV&pR6wXWw6*>ApwsGYyUqm;BWii<%Y2J>EBLUgW492({G2W&5#Vk zwXOxOSRNjpBBVMkFfW%y0+%OK7yaVc?q;#;C4P7P%Ml_% zKSRydBjLVcKy&lsyE41Nc4O6dqgJ}oWhOB}$#$RX>gsNPk)}C+{*UNrom;)@E2crcbp)N+23|}0G@pTnK}en z2Db}Zk$*NcrS@m7U{!sf>|ZJB?^$f$cL6Epq&NKR-_0qzkVVsaFLkA=Z0=a zIKjLW$tgFZE}ZXta3X7TgM@7F(YFLf!s16!(JowPxsvqeM7HS>S@p=s@oxjpQx)oz zH^VS>?yHp$rSD;9t0Q9aFlk;Zi7(%Dyl=WkjuY%TM@7}#Mi)6OYBzEVrSGhAkNF{w zfj0Xj1T);z!;a=m&lxPxzV9k#9ndwYlsnaB{YtdPA}+a|phIf}@Dxni@$U%J)B@8i zbkblV;Fbt*6PjsPCqUDh4@6(s-+4$-R|u$UuCH^4AgV*p-c#PLVn%??^s&EecLa~p zDKZ5>RI4WMJVDzIK=njNv%FK$7eMrmSR4VhC?>b@N-THQ{>^+)^77j6$!*#4=uVRo^gBFo>_jT>a)0&3?;R}NLp(OR3|8Rsl{7a- zkAG94X5+Q4E9csAm-Hb4Npm@A&!L*5K94y$){N%boXX| zctJiud`?tty$a^!5Rw#mLW0>cfCBQf(&Pz#xMM|=Gjl$lC{x1=TqSOyPwaTU0rddA z4f8J;u49B`A?IipY9wnnsgE^dcOawOtY@;8IDtJ13Gn&zW-h${7H1`#O40Vt%rSY}HQExQFo&n8))vMmdM z_XHaMo*)_zP~er*pgv{WWx%#PWZ5a(J_6C(micl7w#5L5I9zo^PuX?}u#HSbk-Ec;JRvQKb1@I=p8%>kQh_B|RnVw7w$JX*# zK*!R9OdoRA8d7R?pZrCMomb3UpmkNaYuWN}!+YaoXv9A+%qRGGnXOZIu~@|^O@VPs z-7Mt2Oh9J|&~$h!NAwgO;7}}r;7YS`4SQ=mvmXf@xeOsLXV1i$B^QoT5_72((tZU|V2$XR6k!G! z+P7nK8vVUi@ppX#YMkdI{m|DE%NrV`qtdDeM*I3MDJDaCTruS~ zjSE?I8oV?C8RYuKaV##QcGwToGIJTtZNJ+qIw7v34PsnyHWI+94~{wTlNpLbfcyAI z)Gu{BrGNvFwr!ZCU~V7JK{gVAATHhHLURI;3_$PTiUlm}7|f^{pk^eEz&OEM(Ez<< zniCBO-l`e^GDE7obHXqjK;XY)I8XQ80S6zf9u8@E*av%g_3G6kq^Pj)OE`5|hF$t< z!@m7+oqLIEza6%sSYVpmZqg8NE#`MPqu$nGQvj);Mc1{0GC78_il++6(8b8<#xSmJ z)SQEMlu4{RTn$=o{=U!vF|s$Ajm)Nj^4D38TebvN`iLGyp^Ee@pmlbLA$ks~+6awj zb_NaJHMZlmgB4gE{?+L@K~)HgtJTIkdLGl6=J!K`bpv+LJQF#%BUOGQ#IG*Ux}1qa zwB~+;4tFGhhF1WP0udy31Wq(P_m_1i%eqHc^;cj4ehN^1JI#m{h)KpTMokmA6f6gF zh@V_NX9(=MMo#IXAp4HMP}(zK1lKb%GrbN!WR-#mx8~@fmisKQPg%~@e)Bv!#H&`+4Wv38ah(5V zmkx#OF9|mlzu8iE@Dv$$pY$QMHpkBd&Z(Cu>A*$1Q^fm6+P@9E>DIZ~v2Tr3im}Y; zH~;x9!D3eh4N*$B!wS!V_`qi~y25_sHe5Meq5o`1Y}_Sg%j;J~H)L*jws`dl2LX~# zfXoU$WL7c+Fz?7_Y>1v(9t8mX+LcGOC(#4!RJy!Q%$_LV8bI9Hh!#Ua_!}~b)e|Ep zCUcSn#0`+kI!8dCcA=i))MXL_tSQb!LkM;Y24VgGI0!jf-R`-`%TnVvwqw=3wZoNA zgGOIOp?*zY?Swb4rOmSSi)YXN)>0PW7N4fPh~gKX6EsvA0h}U4bZeR`V@^wtK+qZ6=pJB8Zj&s4Y@cs7`%gBQSYijrdd1 zAs|{9qj8-8DTa}~r{AId(rFZ_1){D_cODVkE*|)iJr$iB1W_GEt}fX7Fe)e{d0_;d zvFKJ)n5G2G)wR^#(vlYL21wObW|8hwsqeL1;e_8pA>*_SxP>gbI!s+>bhkIUWn_wX ze~88)z53@6^qL5{y}C{DRyT?l?w1iH=QcCWguyO;rLUfT$1{6>ayOC}T07u&G4|Mp z334=X>_qgI{nfw7#m^64_1&q%ZT73tp=$)cY?20CyKF=)ML`h<$yUoKE>^9y8)bOc zxro8iD?K0z&_Dhqqe7f?wtU8?PY(=WnA<-j`I30az-##cOu7kk3R*_XBNgg~hHS&z z{jbSrTltAaYvxfN{iBXl37=6id%Yhbo$aIPIHz&5{ZQPmm{eTsM5X)Yz8S?C#%L$^ zwYC8-ZlSC(mDHJz{jkJlkrM7aA#>A-Fo@oKx>UhSi2lnvsFH+R4)yg9a#Xnmm0p3Q zp8|Lf;t!u{c#JLtUhN)mxyNC+k9OQ$H({AcO!d4(LP7KU*TI3UM($3>{gUKc=`|+r zI#Rx;-JRL2v>{r(xMwy~s(OQ5|w1nvfOPmJDZjt2(`9Dq7% zV-c6cA+LW@i`CoHbn4E++3p?IZmfMm_?L?4$t2Lq8X1GP{i;v6PeV4bnIrao1Zh5Q zVLx;uQIN-|(l=p-?pWpk$owlx5K<${QA?M$Ry#Vx7AAgp7lUT(_#UE6$u0oM?sK5k zkvLDn*D`HG>eWsrXaM3jcdao-==jG-<}{a_xGH{Tfsdz`G_?D@^i ze8zLst?qD{YT7|oR8pbfb6#E>X0_WANxiWVKn_cIZh4jD=2_dx;ro64l5ja<=OHQJ zx~`;yJ)JZs`a#qEa;Edc^!I$I#+S5|>?j|BB=f`U*IrA4&2<5<-o5$vA_-;i6Qj+C z%s&R6js(hb0nhgAJn8$>aH0aj$;M}4f<0vd=l1aKd0`S_vd5QAdp?lSRETst&Esy+{*iO! zw)+bqp*&Bg2o7!=RA2?lrC85YW_rYyi$Sm~Pfx;4v3Bn_V#J@65z z#^=y@bkoH1$96#)y(R72|D_Wh=xIp*y`TZ|LQ`-u&`rs6PTt6y#J74c(`F@k>Ah8g zyX#ha1QXi#LkO(#G6TJ$U*j2f;|eLJ_BK7bA@tUb)RKY%!}-Oy@~%smg{8syPFyE* z?NMgaH(nik@1kpVkc~F>T@&=(okWlabAeUm@?3nM(iFuvZe{7`)Hy=)`^OJre(&&b zimdui>TXNCAAbZ|=kfB#zx0-Ur}D+dGU>Ib%_4^^ejldxv%b1o*4T$s&`LdevzfE4 z3m$H&{^HGL4Z(}d}AK0d? zfWxM`>q@PR0m8Rx8_iSBBjoOthuDdJU}OmUwJ(3+nst0Zu_RYfk=NYc=;~Mo#-|SH zA!Se2ggDB1p`@sx^Ip!6#8Q1WISr<%66xBFo=b41Nt}W%r2D#m(=*}=@2=kYVL~Rz9(5i!AgMx) zYG`nV$tq;b&B$y z$`@Gfz~$%JqAb#07I&+D)F|RtTJDH(`u>TjZ(<_HX34BJr$mqBjPRHmMNwChU}EK- z9?@ouzu}t(Yw6bs-ACr-4TgRl-bB49M9ZG0WT{nShOF4GjY4N}ko36mGcB!*4a+AQ z9lUA|oj&8Lf2FQ`6Ne^Brza>g9eWozk;gR7w#+J^zCgiqN7qP$lhV%AkTwM`}3N>9_iMfAa5OFu} zwe4VOY2QK(8U0PKt%Nm^{Z*4%*S`bYJ`G|yua?C+2Rw16AF9m0->R#b9KP7Ea`fhs z+BJ%2&;I1PHX|z2B(m{?qZ+^G+>%ArIHTus+VF_$%B0wdm5i*96b-(MDvog zN^C|EwG74kEw24At0&6&1)a=gD;B5^bIUC9%wM4D0yU4)_Bt=_799CR#t)JspAXlS z80K{IMh#U~7xTd}hySXTlb>_r??Oq{ln+BMlBVJY@@00!_w!l;t)uooY(9cJNujOylgl%!*aZVdk z?2ayo2MhBTmmA%M{j2g<-qb=IewEDLzS_zMLKrv{`S3*e-cL1O99_-+N*zV|+g8LJ zvJYll}$XebmX*Hz;HH6uIXb<5B8M+cWyIv>w-Y7Be7X z4?P-UE4P9kT|WP7tHH`9Q&Y7hzI^`0B1K4b!ZLZwht0OUn+n-kwVLLg`Mp8gkJ9`O zqkOrzqKBPx6p>-7;_DBRFI#^#{F?Q64|XPNPO)x46-9ILHhnF5X6w17N;iB|#{OyFbsa&Sw0|lEzD7OEvreVFIukspQixhi zy`fkrBXR|rJKM`f-=;TtSHZwD`-RwVcw6+%yLovSc$a8&_PK(&m6G~9hSf?BeH=5F zMxnRn`F&N~q;}3g4Uz}Ct$Nchc&=c=vk~)OHBrlPR{a(j`vB(M61Mb@*ds*?9+--6 zwQqj?{0_V|R0e0V`};^ae!5H=FZ=vXzeOC@k;Jvn@MWP5ZbE@&xh{U4WPA)p)4}2} zxv$iiwmROuV`wu}7!8hoDSilyG=?dQH6PSDCfbfJ|cg}ZNl0Tg_5`A(aKx)sz)$bxjn9g zH!eT_PIQ`^j%#vaiSm#P?mou8Bp&MZbPnD)a^1nVUtZTD^Z-NQ_o-y^6U8LP{BWWr zdwF2>$)M@DOGdwpjCYXx{eMc0H)U3zBx?GknY-t1w(w+q`eDmr1QK&bkeJJVD5JbZ zR$t+`S~q3>{szd}`BiG!qe=XyCwQT4{FRQwPzh3g{`YiB@qkE$~;{OV? z#EKe~;v3d;`<#}%^Lp{v)Ksamzn_M2L)Z(Lp+Kp=u`x(L*1WYOZ8z0^#4w-Or?h_U zhyCuZntAIFq|I!$1(hjUCv^6vAFQbA&xazI3hW*K5X~7gTzrAsB1L%l>_b1N0N>xL z``P^t?yNB)=>=Oq4z5nZUr`0?AEP&QX?9N zRGw}J51L;1e;$L9bEof+9CB2ezTH=_PhJgA4jSx89HrVq&ZWZ8Re7$1ec$G@4m!s< zzU6w@%Zz%+My`~6l!yPG<1t*0xea%S--x%1m+?5F+pG0E_`xaTlh+|tB`>C-HZ&jE z4qq7*8U3~Sn_t)}0fqeGtYo^5`{^{**I8zSp~LN#Nai{x{6+bUck{-ewRfyu$1g%i zrJh1vWblWHWj-iL%z`eLnr+hO{dNo3D|>V!CTDqXKWhnwc`=@Hm~ycF<_+3v_;3{8 zH{jjSkp#_Rdj`w$UVJ0yxhN_=;r6sE*)K8?aI7Gf&2}-~9ly>$i!~hQ88wuI_-z&& zBH4<@ZXW&XWSNlUm-Kq(&Jw%m>$4uY#?N}CFf6W+Ro=c23R$_dFTSaS5a z;Uvr(7^S<-sK2j+WRA^m$8lxWep+FQ?_1cBufv7jFSRD6IH!jA*sAlMxEFT!s0<*O z?Je34@y^-!0r9da3ZdB_bB)r=`!!xU8125u_GI4@4F>*gF|%|D&u53rlT=#^IB6^C zk()lt<+HLeAud>wn$==^oqaWqgwEPS7thgm39OG8`-6t5w)|^-6V(~v#`vehS$E3K zeH0s9470^nZ+@u1B~;-_O6u_OunU>B^>52*D8h43w~p6$V|BCQsSh8=?cZj5-ufdj z2nzr2s@ZZC+eSNFZ6%H|O+wqPUQ%zn0SDPdg-&vOINZ67i?4ED;Mk+_*zlfqR#6xA z%(=Y1qZsYm$~ACvhN8;aqkQcgF&Qtol|Co@ZZ@~6$F=qQ&nH{4g|M$l?v;hg|0R!E< zAK$e@bi8#^m;MzEU6GmY^cvFKK;RfW{}!0r>Fn5GH}hVUJY7yO$r(M2x(+?Bfr zcJ96}v5iZY&bhydXq!AZ`qkg9wDpazHX%H#P*aVeUoS%;f2Z}(j!)fSJ2++OkhErt zA20YK?ocaOs!=M3&86W>3-z^ltae9?;lY7~L)gybk&>seNj7haHkZOCshZt;R{RPc z{^g^{)-AFsO68aS2yHr!&$1I#=byM71#>r(nwm7 z9*Qab=DHYWw9OV3+EDpdcrqP7coyMTc};xm_Biv$+!2b+I<(JTK!)b_$~|_62CXAc zhD4#}FxwH+$d?XqF^HHA(%d?F)u-;6wRt@9Flc;le8pHCRxc~z!c?>@-#7A2p<$1F z+uB9N(P!dDFLVOtKjyC)ORv-)jjzqI7sVT{46dyn?NuL?Bu^GAbh@{Fb5?EZ zl|m`$Es^9wwrOhnHRfCG+z%c%?8;^Sd_K>X23bW6$*e&pP&d@< zFX?z#$xSOe%703~@6ykV$581@bmDmYr$gdsG>g{GUFZ(NTF&T8ZU47ZOtEsMwuq~ z?bq=mQ4JG!>I@Pg=O*t#yM-pN!w^iEt%jWyzx}a+)zBKp+k;XxzSjlhz;QyLQAea~ zzW?siq#AQiKfm;Eu9|Ia4@c)@SZf|ztc!)`c45cG4C{yR%ZvuN;QAdO=UL>=ZsvX4 zhFDJN9;5AS##W2fdt&Rhjo!c!!Hh=h%ntQsBsI+0oWwX74ZcC!ZRQt3-$<{LKIUcU zHyRSz`#J8kL9v-3CH2E#HsN5cO@hf%?00gO?^8tmrg{voPT3Px;ZBsurh1|}G)Irb z7&bB@E!Ep0zE@Fx#FGm9bS3)so(8AJJU;iDUy=62zmNp|%0lRb^Nfx8D~Uut@>-LI zPf$1M{w#{V!PZzzg}UeyH4YP1ISbtAZ1oXxj47plo`TH@+?zZGXP*jHQ}krbw|gec z8w?FH6=JH|Cw)5$Dl2@BR%-j9ha1raEiVtB?Ec}qKR1c`JzKt$=rcbt z_19@~-j}bxJ5ls;0v3I^!Xdu;W8Um&;*HZ`*$tnulH5qXA)%Hn&-srI;@*!Ar+mxx zqwBO+qXJ_3vsO}`z%s_%#$9AoXd3kLlYe68#>FK0W%d(WzpprrHyYj)npchPYunHI zyo)y&?)6Qbh?TZIGyYLv!zvLftp;nrb@^!fN1wwCR>~lCy=X_* zDaKXqmHvk${`hQ%9vZ53gYm}6Tt+AEKGNw;MQ?omNRw=$FU`SQ?>VHoO<-%|;l{?L z)dY@t2qX&q(=ji-Sla~qC8jTbvVDGqFRpviN_98irgv{Y$nTy!DZX0yj!u5&!KcgG zLr)4d_OIWc?YHTQ^V@}M&;4XexnWqpGmc|qJAAVez6!tUkN{cr+@X{Buu{N3T0k*G z-xjFU75!t{K99F&wK=(@2ot??oa7MdlO#&WH~k^dWL00r@sxR=iEB`$p@IY!@tpa` zB|L+&GS?^f8qTUZGu(^z+uP<5?12_AYV-#E89BFQW61osHe`|SXGexjF}m;bZ`eip za&AM@D(|B@bGC}aX(tqCWP60C^A^@*FOGkF3;JnZ0acLV0ZQPaEsJ$I$PuvD`gLjd z5&9fzLq+6wr6}rItqirbZ*@{AZGB=)EnE75tk3pcn5mtq_o!jjbXS&P*Ti1jAyqfX zqu3p6kY7PygAZ+EaEq)`MK&KQ%e5fYre!X_r=1RQ@_XZZm$(yg`PcsV1Zl<&wTZ)? z!+pHAg4ZPI=~dtqhbttW$ie=(Zu_K?H5o7<$Pu*dE6u;lycS-$~}Y)$BGYaO3Y`Qkvna^JzK!B`zJj~L|>?)&Cu_# zdLYfY(#6%?Q(HIuMG`VE33br9j6UxSm2%8W)4ZjyWo<3i(5U(D=|RN?V&5GZKN;Kp z5xMET7CP!mU8Vd^q+`wEpzmIfe zWVS#3>B{_SOqId#&HFTmT4KLjvZ3YU-nsZ-gD|_1&E+zWwp$4O0)ske`YQj>MiUk_ z4clr0$7(U8UOUKlRXc5aN4W}f&Z)`uHJ3E4+4FeT}o} zzKx^js+S-4Xd3rNo%s%27lTCuu|UF%0gW>MF+LAK;anqrdz3E;%Q#$W66&f04+J;D z&fKcQ*XRXoFdVTRNsjVwR-#uT+G4r0%kaahyGw|@V0Wl<^pS3(nsKN_TcOyd)t{EF zM}lbfs_(0>lFbrqvT44#D(c9`xvbpdnEdA@(?LDnBE6n}Xuy@7G>yKLrD2WNDEn2B zvD+<)v!_cg>0}!32qmYBK_l(a>e)WEB|frh3UwQU7krwPL6(KOdQdm>58sXqojo`= zV%mEfW}LMBJ!o%x*;l6+=B&))8JaddBvXq|soAT3T{F-*e!XQUh11!lJKjPpb$L)Q z56viu9dmX?1=`E^*nES>D@%4j4?`r^nn-6BmRrlK2ll}=*6}r&I4-coW4l*@jm6}G z0qY&3`&tm~jcX$zw$c}taCvf2hHi4y7B0`6FVf1J5Q$L@b=US=%+P>slDpx;#gt@Y| z(#H7Dui{;?-kjX`srvmzoZ)#O&We|n}&|95s$N`Ro@v2=Vu+jcVs(# zH-j+^-XyKn(F@WSd&8-ee4F9&Wz2UM*clOow5LKpaSirDxto0?Ib#jhnPPKC8Bd#G z5z~Kjpr>q9Gco0~uJiC0;k_`m1(ktSE=y@`DUFXOF&0}%6iAuNaCG9rH zFpX>)3D8B7*NS>SP742)_O4A+>c>F^{tYhDJ?!A{K14ZI`u60(WBsm%FY1vj{Uu45 zvj!b7#ExUrt?Dyt{%0Hm&Kj?lHX8BwZ_WMU)lC=x;DU^y$h+^Xf|K(fkd!N;h$Osiv-B@SHmaoG8oGvleOJ(=o1<`n zP96-;die9O&kWWCopg~J%6QZmk6K2}vs|9{i&u1sze+~?6xDCl5j8?S}dYi zhF-u}x4XKeYhJ%9^7Y$)8?wUf>8Mv{;83sJRnz00YA2h&$zz)I!vgYzje!9(Qfz-g z?3JOPR`$WceBGR>O?2VW)ZOtxDZPA=N2A8K)y+lKutx^YAFtc9Vvfx79wu9*ZE#8@ zsL(_T?JtLmXeM9STI5(xNE^6a8ZY_nuhCt8N0q$+II0%>8R($BBTXh>mH z-wmmRqA!Zm%m-~18>U%_Z(h8>jFcE0Wcg}EA6Wi0wL0*vsv3KMNExXKe=2;-YwM9f zt)kS_80P&}n8?N~b8Xc*eWh%)aoHQ+k^LzvX<_9-pV9ThzF(n4sXP^TCrJDAzt5{A zyRsN!#jftDyoNumsH|zKJID*E@q~`jg=3_TSfyS?h~_+M^hK4loQ+-RUFxrBNEot! z7@OSLC=hDwoMn6XcT#OP)mwayd*z1zf8LZf1(PekPLv&t(?Dcd1*G7&@2vWk802gY zv`LSL7Udcais(6;3;S(UhUunWN@VI^eaE1s>ue?Zv8+@+X`RKO!dc5-klxHauJx(e}$ z3B5KLOO(X6FbsaF%HjBV#Qwwx%=j+1ST18K z?wb2W+5f!HH5wy1L{EPg>G<)WJpN9ZjW#?juhVt{@-a_ivq^|9gQNFSY2sM@(wle= z!|0HjD$#=PK1&I#2X5_0)``Q0P;IOOT6819UrYmMX+xL7aScH_qO1th8^jx_K}OH1 zX*BbAqDxTcI;Tpg{mftIp+y?~4k_YeVf9ebJTv_Mlc4WrS+P^#;#FA@lsD5d3f&aY zZwmf+cFXi_Vhh@0L>Qk|19vc288p;W>55@Cn&7Q{Nry&zI_u?d<|Jm$d#J^Kz*3Nw zEBmo-d>gVrZ~yT2NIq~fKa<+uVTl$xw?>u7+-b^sKJ{Y>wU6}fA3a%Iz(fw@rjDp* zvbTzZ%D9E{L$>tw8#Kqc0mz+XKU0!CBtNJZ%(OSS+jv0BHfE4|UX1$B#WG%3yHP_o z>s?cwjdmkXhr6``8`odGj)qO{znazqdzJau&)nWVWs`EOpCiLIy)ex#+;?C&M8cvL|L7@nN4%Z=!nM z4H{1QWq4*~Qc9PnRPyJuh2z!b!A%(lr;*@l9U--eB;9%=)V!pg zgAMG)QYZ22w%z{86xn#o+HZKV4br^(baBNmOCNlK6)h`!L{xNS9Qr2ET*3|&T`lmu z*}$rqtYUJH%l_B(sVAU^VC|=Ou&4*IbhWy@=x?v&na)Dk;WB^7o=h^~>s0c9+KO%@ z?6uf_D`G!PPbhq+Hq7Vf$W}C_7Tn7}A}P!awG^;?dPtQZf}Q;?d}!xHEp@xel~v}C z28r~#{$YEsGa0QSC<}ga*8U>p1KF6d(G>fk>d+w>m-l+}PeKsf!nTK*bDj}IB%ZfH z)#u>r{^6dpw;qmq%U~ngejW5Yq4DY8trrjNl6KKzz3DGI0ZzL-#b#apXtCd9N_zQF zY`?iXA%wi%Q=}%uw>d@fxZ*^Tm?Tw~Z8gQ>s}3*dOSu7QLJi%J z_7|23Cn@G>VeYUn5V2bz(fBb|7?vDb7;WL`pGOs7dG2cn>+<^Nt>>3|B2aTsAAMt(^HOr`KS=_74ph?UkvV?oOToKt-%7&a?;XpOeS8qYVq%U>YM#|{FXxakjraEUz`uL)+3o(j z)gtPvq7ftM`aa*yQuLMekKAeoSTu*5NTj}8(BLPjBylL!c_?NxHuI&YuIF7B{GMG$ zb$f(KX>|G=HGJ5PKFqrXLqx&=s(pd4Q}m@87^H8bUcKh?-tiWFx!W`wkyu|rP{G+~F!w+hQuenN*s zKlW=b7oWJ?R)ta8QH@Eo?mV_Ea6Z=J@H>UlgJiY7@~O1XKDI$wT6_PDsyb;ZqqL?933Ml6qq)Tm2ILEu!&A)OXflUEkY&Nm;bzN zz7}aNmOQrAg4~V$>W46!-LpX({ZwiGDA2u?nd8TFT;ms1%`34(-r+k@ax|_34vS`R z9pzA53^hxck44Ef$;($It;hAB^OaUfZYjDs^m{o`k9!G7mzdV9cSucnc0Oq7jTbT( zpZUI~mAdXU9UzWP6Mz0;*S>LAB4hUqF(^s!(xo*O6;6Ec`pxp9L_0hdRpQQUG795{LjwRc>&CNAI})7qK&{g9dbaz? z(4OG-YQ9ajU*))xEnp0k-aC7FV_1%>)MV2@2=Nr`aXC?dy$aNhj?%~}Y)5VHxN>9r7Uq!f`|o3v#^tt#^#g_9 z#T22EL~K%;|5O^7EEzAWJ4DS~{q0sB!a93Dr1rFEHU=obp~!IB!x9BljoKSHZBt1C ziz+Nvc7;&+7YH^JK?Aw=gr`2x-;#UwzbsZC4YHTxa^qWiZl}-3`IJ=817(_3UfpJJh6Ao?puaZF9LwHnkBY$i}m97__CW zqTX&aC?8dcD4t77O44V%Sc5CfRIqQg6d=6wCz$9IhvX?1vq1Y+Y9?wzca0HfD3Bft zwgA)tTD?sH`qo_k{GBlG^T30ICa;}NEt-zK=cbsc@D+mkQ=ruGHC@`NPEJ6J*^r1c z1f&G?!8x}540pVc$}ix3UG_QD##2XCy;J>4T4$q+3r*L z#RHlk>CEEX%#yDHMjaZVl@62AQ%-FpS4C|VCsNmzL1O@iK8H?I_Qb4UV0Fdv#>#k| z+4n$lEtLsyEcHU1>Uxw_0Jfyjz_>Kwg$UXQVcB~FbSfO;xq570<@HH0B!Ml#%Yh!OQ5!v?PWsyAiF+5 zuev#Ch@iF+c#U4^b4mivuYu$gGmS1``TXO+rttTFX~5&kTM~+c;K{$mbt`2s6T$%O zbtL-IBA}s(fjB_KX6R8yoN%u^`#Y0^AhrWpeTcSKx71E`6uCNxv5{;ttcuA&g~DHv zxwQs?0*c?f^Y^>%ug31LHp)0byIOX%Y2kLZwzjd!4h_S5_=}G~CK%-H_h)-Jcyw{F zfV9+Yy=>W`XU6p0a-ZR-M9dt?=xmxTQ5AtE?0-S=a^2N3G6I++fPAEEB+P`a0{=SD zL2KW7=@d-BFB)%3%M;H0XHny6rvRXk)Y|n7V!~Ui0oztWcnM7(0ZqUZnnL~05P<%_ zA5eA{(pBcMZd|jsSTOFD#;NoN^jsY`r_@%(#l;Poai3J=M@a>=lGmQy2xWzFc|EvT zVl|lmLN%blcDU^AxgKwz3cC#14aSAtySz2M$QCv~{gKhpqWjJ>@5OZCoh^aD$5$x2XLQ`%abWW3908yIysA5S4JByOU_HG1=?#H&gxbMh z=Lt{2hJdX{S9(u7L7G9pX}M*5=G13^zK~GA2+Pxk4ikW$$)&rbgcX?rHtNeziZm72}_Z_CJFw2K2Ljc5L{Nnilspt5^}sRe?H z^Pp$Bs)VOL(D0Yb6#JXdF-aQ?6#nnXnm}X};tD_Qd!z&qQ@Spm_OEn;)ku=lxkkW3 z4lq#eiv}m*$qJ-7{pZ`KPyNAzjC*gN<_XD=F*7rpg3jOommz)F-M4Mo6JHEgdO;L5 zxg4wg4DVX1d)f?Sc5%oI+RV*m18;=NRSch}wOJT)(4ZnLJS{7jhop&|65;J%!B|BY zuG0n_4-h}sBFOm&f$u->icXe#3N&^86k_-Wx>Jt-)CSa|B}E3FvOoc_p!;RdB?6p< zpvBBa;=eK=J3oI{nN>fB%~y6Rh(Z0mx7LLGl>Gt8X%lR4$lIJ#3RlV+oZ0)s+i}q~ zU~g|vwzg+r;Ibk@PHWMaMV=pin0C+c!Q$d#VMqZ20nN|PFGT9nN4(A?KgXbGVUhcS zMm0^+>IG!s0U504X4KCuF;UTXyJwA^YR)**E>o9vQLuW*c6{hGZQemAJ5+KrTQb3I zwCWnFK<`x1zWoEKx7>>SqlZZX?xk*+a=TA0p+>Zpr!8m2AO!!v9WeSnd_SH~&lEWD z>lxvX=|mkSmIiw4Ml0L)Cw8QB&7PekKI|JL(dRac^s3(NzG{z@wXv~ryYq?RWN^;o zNTA`s9U&V+4tOZ2|InU~wZt;Q;&KaSAWuWJy3*&uB2`RIIXNt>nmE@$kHHN=mT6XT zIXR;#)qR3{XVUM8OlfX)39^dyKjc0m@$&hmlU4cmN|t#LnOi{wEownl8fauHr9&4h zRtgz}QAO@G-uZSQF-CoZ2Tj{5ymvY!8#oB0={4%POhCmJkWEj#{nT`C0CqnLlG8Y4 zH|Q?5o8vm!6Zp%%1?;A!KBIEVZY^+{7x($p0QkjS-B^Bled+!M%sGeqF0Mf8&LF*k7WLrO{M;)=WhQC4|w4B0{0vJ1W|As{9hfS NC-SO~iXRvS{vQ>ft~>w$ literal 102492 zcmb?@1z1#F*XW_UySt)Kb3X;Ds zz!C%C0?!2ic2AuhRi(wKwX}7pf7L;Xk%^N%1pb$AaK9&?e)SFjZ;=4dosZ=v&CPq@n5i;v%NF;p59;g zo7g-4g6F`nke#i|ud#lWUo%EGd-_-n{H+E4CkLDWRX`dL1H=Dc%Pl^iWdQ*HcL0Ek zx~(%#0)Th^06;u*TSuP>09bwiP&aIEL6;3;4NaDX8Yzy)vvd_QLaaR3n>0TBTn5fK3q z2?-Gy6&n>51qBr!6AK-i2%ngk2%nIUl!AeZl#Gs?kdT^HUI}1ZWH_mLkqy-z`)_a{OklMz~qC4h5Na`@0XCSW;b38Lu@K=AVPFy95C9lhIPh|iaFB8F@QG=-?>*LVa(0Q0 ztE{RX9U~yR%gZOKqN=9u5FHagPDl&lF><7PnN>qCCjRzAyD@`_grwQ44pYCSgV)*n zjNk#tz$5>2cmNI_76B0k34HM$4)`JrEIceCG9uic8U7j_2LYFcTO}G1U(})U10F4p zs?p20ql9!IF|~dAr2~eaGXOgHZde>R96%T-@&y1;Uh{F-bYeeu2n6CH&II6Sg&VV6 zB^}=Mkl>y2 zW~8Wr9|p^uMh;lf$E}_E?aMHS;M=k;($S0_I+);_NuCN3wtFvkGXqV;WLpxuTg`p8 zDhKXd_r`+f$Gu{vP{2%gQxH@-5*wI@r%Aedi)qJ=2IlcCcEVOnA4kE&eDTVd)?L=> zCSgB)e-+=t;M)xVg!f>TF)c1q^|mE@@K25^vZUYY{{kJ~I?7sFkX^(U<^0ZeK%=|)(5e^m;cIz2U4Z(1dK{lI5wq=twWhHK; zJa4o~t@J`oOdc@!Usmcl}xRSwZ34z>MbNWey8vSzfz&73Dg>o9v#r!JW{ zVyD~bE2TPMYu$dFk>mEDg{DlUvKVdbdGy@LnXgwSv^~B7+Bg0$Gi(mTex1|)C#lcA z5%PzL8N%)u>gFnRxKgfbC^R#1bfSRcdfKlR_-Cq}d-!oL8Nn>IUg{fie z_9^ywwd-ikSqZ3hy^*{=YS;xmmuu7hjc^#9iiLfhq}eh@gm{8tj6HmdBX*If*^DuH!vaKtcw9Uf6lP^>lPU zO$Y>{26OMP0&1iq`x-5{-5_4Y-SQrXMjJ;T6wB&M z%eG7#C@pfeL!Pg?=q?}tSnoJVUR%4~b(FEasBlb^f4R81N##!)m*7_hv|crUZ`em1 z6+~j0sHen?@P#(BA+?xip3+$#<3=2brHP|1Qz(LA@JQ>zd^OC|bSeH7(iKc`Tucnk!wYu;)@Qc(Nrq&hY2bZ*ZX(pp9+=H8D*)Bo_4sczi z?tP|pn{@W=B6&2n+BESOz2Gm8$2f+NBG_jcJ)V}zYY#3o%Lxu%uRQ0% z3;K=dUmvL1ft=1N8C5*t(HfG)9iB9s?#cX9F5TfWq1~M-c^Ba+<&87O<+r0iq&9M) zxBJJc(vPxeQ+rOc_c`kYJ1Lf7y-QP_11L<~oT__`q!;D&&26mI7+J2EYK8Vf6cjL1 z_my(n_MM%9LSkHZC*ir}4OI^QF8@T45A{#QL_4^a5?BiJVq!su`jS7;GXWU*rDy^i z6tl5Y8UvR=L5@)qzG~U|=m%*YK$@@hQFW}Hdc+di7Uqy*t*W3OXAa^>n<_c(I`?Qo zE9RV@srrnJL2wE(J2(i$cI)b-_*kFS++{&RBPE@kB_(0V z_Xxd?2L|XX$&jpoUq;hwQd{Chc0&&oaNL&;qAJf%~{SdK3M=Te-&o?Qvi`^1k)xg=dML1%TB7?aIZu++pv>cLMi>S{h zPZVXh=24By!!;2Je9d&w*&%fe}djVbbL zUI2_lMOLzO`>g5r0?W-jU3zg6ns;$AmjRa&hMbld`Vd6YKo;zqu-`HsewEdO6Qtl; zIZixojbTB8YojY=qajy3;r^9%aBp~smUER`@J5e_7>8EAd>YS#G3AwHszwRn+aWcN zSjEibH&WDL~Mx@g`yH^ zRh0@^ddJ*wiY~tn9<{0%LX`w*BjFdAg`>wQ##AwtCSDLsM5Dl=AVkJ3X$AH)lF`~m zhr9ZpdMP0OK}M<8Gk11{Z=dhO_J_;{bd+%`j&p!&`POw^_5vdO+0O z)Jo#hR&}4f(ksjkjILVK!P4A9G?tWmEKClGb57l`5x{@;jjXg0J|&+j0#-$6a_Zev zYb6Tk%WryV38Guvx0+IfE+PdE+%6#6k+hLY@tW?03VVj=Kb7$l-+xolCh~&0UT94- z0n{_NIUxV|{&*4Qa=QAZ5sy{3#2_Dt1_5|o@4pr1)g?V=qn;KlVk z1;FbLXPE}k{I|^lVdh6>Y2ABG!r!KbJjw#8mjS&;VG$G*aHqFwxHA6EUJm78gbGF) zq$ONg!K!0M^|;^wIHJ{301t+^ZEmf+=AQ%C*fek~Dk%0oYS%8E%fs0D z{%Qt~6p#=Px|*^`a#wx@*C8*L^n8bBun9KS7eJ|or{JHO;XJ@@h7DbOoliz4c}+`{ z`KuKGZ{rsc^H{07dsVK~-^BHy1FyMzrW0YRUaE#|GW@Fmzu)vW9ql46gpy^dJRBw4!*t+o!U_ z2_y87nUBEJeYlSQsZj+EHF(bwzH5o$cit5ynn*S`LWXh_3f{AP8*cyd?I#dR1%+V1 z8k6H8d^()+sl@x7P*T=}TNpA7Y^_oAVlB@*y;Zb3tQuqsXljBkLn5fY0KmrammnIw ze3-B<&(hK>X*usPUp5QB| zw>ftkq=HN^uQyhyzmHx*5_J@ZXosfwLyer%hb{d{n@@t) za8JvzQ=?XnoK{j^P}Z1!m-$p)XiUoDtSNgi!m{A8{a{b8ZP=UvhF zVWIQa%|;u(NJWvQcLKi1=@v3s%GS)3DR||kTgoO~bCM*z>AraO_-ney5t3&|VbZA_ zAL09&FcHeYZ_S!z9vg!cU(rTid)w-DcNO*)`pF-tDB(*94%gf0lm$75e5dr-z#%#$ zJt2en@It1+WE$^kAd`U$Y;E?-eLmUcnJWq`BQGrt_T2$B9fGj0|KJXHov^`0p{_=l zPmByEI+3>7Y%i>9rftjGu}@!T!dhi;p=ofB=OR2rfD;k%hl*gsGe=z*f!`!T%@0uc z;lNZ_I3dSaxsT$A(V;EYS(>F1a};)n`x?=Hqvg6!UPb#o}c7gW~J0*uJ3+?h=zYvVPJ(8+0gsKY9`0DINwPEk#_J z*S&-fosAq|3o4LLCR-ZhUzg3A;HEGAHdTLVEG*$-?z;aAZkrnwlu{3&WH@Sro5-pu zQbO3k%uP!Pc0ge;JAFnAdc5}h-lqOC@lOES#`)W6?Nw3>Lzv-T*hh&)aNlfb7&S4x zQ=h*P#XdmpgFViK$xu_TbXTHEZjV+4H%@bN&SH&VQ@NgStCVZb?ftrl?z2k#ygK?9 zoHj*N#w?1qGS~W_)jhFc;FfsrB&we1@AN16`o#{a5%%K|aGG)L#wcC%CcL7nzch0% z$be2rFO0J$^~E#pZsX8@aAB>^q?~wnQL?Lf+=7(D?gk|{QQD6;M|4RU120a9Sf~YC zp%}l#h{qz#W<+s;IkGG1Lm$4709_m2yT>M>Vqr1nn$c;~-4f5oC^-Hw2r|`>M&|~N zI5~s@ltbOhoHM1IC{)*s1e@z4E1W*Z?DsZFmH7u|gs7q0N8eUq=wcssswPiGUpAK& z98<#$`vm3kR??k&J$ZD2BWGhQeR$H4J>rY?;Dk2rQ)lGbHy808NtV)Js)3p9NHIX@ zbSgwm7gsp3b`lGU#|Oy3;SnlhyaK#zUQ%>)tQ-jBYa_`26F49}zLg*|C(6?+DN_W7#Te6dp{W$kjmcAAThi#X($-%6*T-$4f`TW_`{It6qt1y={1!-tcF;?F`y z>zLmI(*erh$ITjYfP3GB- z#1w`P+y@7H5=hi2H>KJ?^F>j#LYMLnv6_m&a){Wr=D+YO64yOU!N5EDde(eKLm?MV?ZMCwBq+)lncAp zWaTV%eMwff>2!>Oh5avhk1Dng4k#UG~r^ zy9ifNT_pIb>_J!i55=&Z$tw4@XCm>A?97-Ve{nqJ)5I@#Qz`Bis3zSuOB-wU zU$7I$MtTdydPf3fZIQ-O%(nJSD2}v4PlLy5EJ(O(jnWkA9C3s4_-m+9!x91>Tp?+x zk3Rk=h=r%3q;E7G1|qlX-B6FunU^IYAuR)e4lqK(&9y(hx!b)hc2r2l15migNEEqR z%F%-OHD!8#0!9&?xBCMdtUs6w(EGhXAYO!{`%~fD19tXK8$Gwt9rJ`^98-OVpxZMU|DOxJC5PX-5Vcb<1q7OM4vp6q0 zNV13&P9R#ZZDz_uLWcMs&HT?xpUG~LS8|qe;NaXl!}j==^AqSd$V3Uu+>Y&+pWfvh151Bd_pDbp-4mT6GLV+=CGj*IF{i=S zH|CDuf*l@j&vdpXK{F$l)5x|8XIcVz-}205=_&U2H%_C#ZX|`K9GabqR^HOB2LQh$ zbhm9Bv-W4sL{4zIq$Tld6rWQ+euJ%Vg?mm5bN`Rwe;IJBB* z)@95SkW0=oUqK%3Qf7I}a#OXh4LM@(h2|sA;i!GWA8@rUNG0c3FwZg(>j~yIulkp1 zy7{DM8vH)_h3qk^KFbt@+zHs&o4#AvIMFyu;NzWXc>T#c(D&U4>jF_tXGmhantZdv6J% zO2({N$IMeAc9w^(kP(Lg8@IY}k8+e#Os5<{%g@RC8TK0jBD&sCJB@OIl(aL`T;rNk zThh<*f?1uXoE*Pv@?l>oy;jj(=D~8-p(QJ?zn1O9bN=kQeU0AM4T>x&E$NYol7L4*s{2vR9o`NG`cD$39@ipd7~qi45yW0| z*D_%zlBp7E=1N$B4x=M!plkrA>-&pqZkU3ixZr_BuREF#z}o|{*B>K&WFmrO85)DT zaP()Cdv?f4AcPKk|IUYw&+IVF;YM)CkaKNkf+E{?or~dZ=8_GMMkDmQu;oltk*JSEJc@IW@hZ;;CK&R+xAeOP5V`}{XZ4!R^ zC(wrR6qu&6-p0~Cq=fw<;#!vZJ%eVE{TSG)cseGRFr~MVK>s$FCEEK<}SyATRJ;9(| z94b4~BYnJG@(me`XyDTx^w*Ck1)8RDBpP@-hfP0$p8IRU6NtV4&eXO+@K~=H#P1hu zp3g~KTakf$>JRPELOhg6)uMBD>3D`%2RrwHJ4)nF&T~$hq*-8RF=O85Z2gG-q*p*+ zbsjTKCvQR6{-l#`wBc~5Rd5bYtL@*B+LqC4u&ay{!Cm!ma)PmjUVkv>Z`6ER9C+5O zLdK()h}aF)7w`!FlkC)aEPZe_#|cl^bq{Rh^D)rd#oV!*rp z>yg#srEZAhm<4g?s|MB9_E*Y3K6)VQwf!Sxa1Z@4scn2b!RNS-X}C?TaOrd~(jrbb zY=nNJMj3W+Yqk@YCpQ^9b44_E7YM`XP-bNAL%82s+2H-r!CzYJI?tQVV zgBulEk*1Q@VQs=%P8&GN`V%-y@U_`T;=55^187G&3$Y_`3EK0lQ%}|>xRH!FeXN7D z!2->Z4i@O+$sg!?5DhMUmubD&#DKoz#;UCa_*mQb_O-TE57{wXt6c%c!1_&x5P5KO zb;-lvZis5ymKyA?vHiZr(UKQqI^UnvfipKF^Aq5G2x42RT7>yL@f=H$uej+oh40rQ zv6i%Cq6C6@tSLjV=PBi8v>Qagv_orvCGd0htzp$PfqI90UBSL0?S#-g4CRJsz3sqs z%79Vl4~3_ytZ9$H4`|7pM{1SXG2(uZ@Wiix$^hhHSTvDmH_n#b$qMYoFy!alB2w_8 zeevGe=FAoe;fi)E(p34B!#A!C_;32U9{@$0DYqm;`_+XcYdSe4oMdQZ; z<1mQJ3H=cO^CP2EfAmaP?JRUg7(jB8JTsx4-Ej)0;DQGEsQJKX{#|&jxcB@t`oYnl z4n~tN$ffam=G?xB!_2L4Il7_|c63@M7I6kI$JjG_z>gGdH;rz;vd#>wy01q~f;t%) z2RUn2URYLk8sh;}PoES11fE^KIfFvKR_(A)zfbz2f#D;))~H?u6#gBy`_<=dxK3LI z8pQ+D!MtK*&os{H^qzGinMgJQPyU+E@p`ByC8(Mf3^%Iz@finPm={O+rgs4~Dp!plQ0H&q#M~&_l*$AAE zDCb2}pZmDTsR6i=>DwJA>0os!msR+L|As1`IUw*O^21i%q+66p1%;3F;_Zh?*m5Dd+@9Jbd;*WUuc{lHJRTkC^(bc^g~gbwe?zUH%C+8?S`+} z+G$hoQ}i$Dj-A~NiO9j&KY^D>8JJ)r6W4k`;SSl1V(PV=CSiO?*$y!98Vp$c_#GVn z-dR~8w>yqe{SaL<({hBY5rX?1bmc6h1J8YUv0!=+@mUex!>wOWo6$jVoSe<;;`Zi0 zvMv{Us~tNE;|pS>%y7Zd*^KX5>)L^ejDxcO#=V~ zt@gUsqC$aPO(Z3y<=p+U0oB7<>#uQ5@$IcbHBV_zku~2*X4#Y(T|Iym>o8GB;#vomp=L=izja9k>D6>kW>fz(0B2j zp+jAmT|s9Z#)fsO@ezs!cDyG}SX-Moi$Ju}bSZkWT+8GjCohR*{_&Nt%F%4rDDP*k z>-3k4kS#8(__tBZ{+nk!>SwnK!^?2byX53`LKrvT1}~<&&4=~8P3WYyL#5RKisgI${|D81$r-#9Hi zClY@Gnli45)y>xOKY`*L84@hXE1Q!+2koS8#Hgzxt@0Iswk z$FV;`b*IV$an2MjFIJbsAS7EhI{wrHJ*dYNpj#L_aVwR5@!dJl%1iK5PV0{b2|s{t ztkO$}9aRH}JmyWjdR^0;_Epv3>ItnFfU^g=O**DFg)NKT)kbA-?d~c*n>QQooY|_0 zemu_ee1{7x{>}v<_693q&8J?eBD_ogDRPu$!`zJVS3I1Nf&QMD92V#uA5#U;C{SHk zRP-f1^rbvuk4~WLRPkaNmx`Ew^Uw#*OyqXX5iMC&I=I@(gDY#t6Zk@=Y^>!KhZquh zb8;B6Di)j3(Zq3q$~x{84`#5AtYJhO|BX*#3`-M6=E987it-Ts`QwuGQxADgF#P}N z{D=20Ca&&co+#1)CM-OXRu(J{u!bO*3q}T{n{#*ebYgx2+gtcQysW=Zq`Pq?3|MQO zP#Z3j2L@V(Gb+~kld7f+mJiX=4wJ!=c9sfpr++&=?*hI*fiNxRJYjIz`@rL4wf^Xfg*Rdaftsc0bsy$73 ztJBM_|29E(n|VqL+3k@c=(S=mogJc^zP=jw5>!e+T+!ooh0`hPkLW~h5Pv*B8wUp-+jq$p&J z#v`IxFk&a`Hn0z8L^x=m%cKFYIOV3VSY^TCR3%xX+ykW=+tTjJa#i%+-C^N7 z@^cSF0N`~+hy6^Ixkij^lEU<*ECbP^Q>RC}!>uuek^4_I18WU_+ITiAQOPPVA&G_k z!XYQ~C%~=`q9rPLba`uQ0i5S}Hhen*c{;L;rjLFC!Sq?X2~EErOEKmN*s;Sd z0*I0xUNW`?HlU%J4|(@giPB`I-vD2&)nf|#ij*!QaK?ss=q`~~Xr?h+viCi$YM-nU zW0;uTm{y3*;sy})Z_KBNF*x6<#OaV}iMmIa_Bk%hAz!eP`vcaQQb%O#Q}WB;GZ{P7 z)O}CQTAfvF%rbJj{Q2=MqakDKJyIOxy&2GySC~+lvTntmqOR=4&YT-cSCUI8XOpWmzm;z^& zfj+gnJ1bA1L)*jpK@;Al=n|03BbehF%ot52(!T|d(AKw39Acjh{shc*rj#**fB5oV z^1;2jj&|;;V!B8a=ebI%0klX2Hl{!J@to! zdkmG~5zi$q>Ddy{_&nq_)mOJ>iquuU`i07A3#EOkM3EW3L{k&#AbF6oj1x3%@Rjt-=xrQB+k;5Rgn-%muYaZSyJz%pp$%qz4 zL0Li2p-nF{+2;4m(^DDj;OhuyVsI#VOi~hdy&U`8Ee;+P&hUQ5&VVk;(x~tw9^vVP zI=qgo=b5OO5W7uifhy0R%WN&Ch9@s4hp_o(Cw$U$w=pcnPD;{oB8%-OKwV0_v8voI z@=)T|YKy%2RVC)#)-?D{$H-@X(@Ui6C!ifg^ecn+rXU5@ZLM~vRP|jh$m7N5ah1|2 zGzY~mC)K|aTEmInAlZz{o6cmvr?*KYmjd z{kKe!AMT!s?G&!i)KnJB5Tri|9J%Z~3+<;bfzArfkec@@8Ft)ds0t9?OJEMC(5oxtbgI_=|NijmyUGIrb#MT!!ZHuY!5a;5id$G5Knb6X*t(F8;2h||EBth8- zvDN?lgPP11W*^>gJ?NIHeA)FI3)%7-B6FBYqWi6;@M{X(#mwFvV04cyUtbie4t-~l zPPfQTONHG6KA%8Qg8nyNX?MuBgS(8(!xgRhJHjH>)k;(HX2>oILG@*#I4>&VL=c!e z|@ETm~En+qsA1@|C$$rODJzC5HXOdnT{{ z53EXsPd^2%4;sKoa4D#y)yh9>e4lj}?I&Q{wf9dK(e}3Ib8U5*EYqVY+gkSEu0HKP z{4m?cxS*(4E~E+^BaJq7){Z+BAJI5OoW)xn@jyt`k?xsz{DZ(Keaw2 z(TH;aidZVy3Njwqg=8~&Hxf5zPIh+E~)!d}=B^*O$f&Wx+SydEB@hEf@FF4PEP&Pq{4mutxy1OUjUe4xW9N@ zZe80;Q(OskE&WR9_#XvEe7|A(|5fDfbJ!-H(_DjS8rcJu*>2x`F*dfu&d})+mI@DKJw;K zspq{ze3IG#p%=`;AV*BL<|5MFL$SWqJ>O+o!U-qANHnm zw}HpMztUJoXR1lONPaLsiv!NEw+ArNRh}cs)V(I3k*es1`jV&xeWB9@1Nda9%|aHl zMTw+6?4*LFR5 zPP-jIcYJHl98VvRT&(aX5r_cVddS`^g>|vsl{)_MZGr8{DOUFeV4k)8YW)u(Yy#cQ zvT>eXt%r>SXL`lndWaCPZbk6#F@M4TjW2&)LoccNTqu62*N~1vF?0I>sTraS_=S)dIbou0ekU27hlQg1j6dKNp?+3Ig)zd&qDS5+ zEiZqR(0wD0Y_OWq4D2)^rIN1@ECD_SQoUBWB;L9?8P!Trp@X?aqi8ZVF?R?P12W6md$r(*k|l&cAw4xwgLbs zs()6Wuxz>Gs_S|zIVaCcFuk*>X9m{lV&!W$vu9hsm3)|Z$iP)+xa=HC+u7nWmLU|c zK%z-8Er-X?ct#q#)(G6ScaXe}H0<1;tG4MH0x(bPKdI1$=1=4>be@O|;AqaDTMrL` z1dp0pxRnoYSc~xVR#)7PlFoJ41yd)-gO(yMEEh9vlb0mpKznGIhv4|H{G%uK01Sx{ zj>gB$;`@#c$}ItOD){5>19luZxth`O-C|`>c0@z-KZ%X}w$4AOVu;5G<;^HI${8EE z?g<8v_Np^a)#=z_s_BTS%`8i3_%7wRohE#Q%5 z4$g~M84+Hg$&lC>dR|V{-qRbI~c3jbwvPL_>HVmW$ zsabTq6rbbQE_=eDF-tDOFFZb}*0vTHPB}dHrn~dH?LP}hbOY24F`WfP0%^(WW6Z<} zd52n>=8v9ou-M2^njx;3Tk+KQ#>ET?tzc6^msWpLn(JaxKbqR!T?;5AK|}$up?t=$mGJ`;mbbmadnupNT;Eh8+}gPTX#XeEb=7m$dp2eW%eAutT<|E7=- zKW?(bDJ=WvGdlD$baf9HW$CM`%F^k~$i^ zlx8TznG*($ViAytR}^yX!wdT8J-U3Rmsc?yXY*1CTsQA`!%adt?r#N%*t4Vmf~$zF zTk?KtLt^&wtj@g7BN;l7gQ+Ht1)&4>Nfz&iM%g>MMo#HkF}?^G?eVL$v-zC`=BYw5 z&ziCy{{*7GL`!f+WllZbT6s9N^gf_Xa@wq1HSjF;E8>mlci;p1B{=BurWh6^mG*&aNh* zH#SH*Q*wU$#?u}l*5X)+ZXl9A0cnz(+f}(uc9QV!y&98U$z@}Z9SeoY$zq9@<7(%Q?84b9jGXrY$D_alwvk6fg-luhg@pq`&+rb|=*Q#FZ;EOEz3iTotC z-#g5JVe(YFdkd1!UxjIND{$QgdjQdTW9Den!9p3i-aOl@Ndt$g9-#m!CK~9Gtv398 zi1sjq=nL=}Ej5WrCsZZ-_Y*I%_p6c39|x_|)N3;LAxg>dupCi9|5EYIFgV&-!i*kr0&a}sU- zZ8D*!!u~RuCqp=OVap$xcwV!{5nBdkPI{_n?2RY-Ze~-)LErkk8PrkWnjIs-zNXYU} zfGH}-`*uo^k-68jCq&qd(~xyyjemIc>H*QO)rsJ{|N98WkmI5H42xZ2?96iMX-U+T zs@hg0T^<=pYdlo;SUCQ-YGcdYxyJXxpW%*70~fJ^7E8JpIm;;w=DGod-?`JSDkQ1)Xdk` zs28KM{C66bl3%qPt16+E)nl36o~e*5sWu-#fgdEV5u8=`n^j;k5q|0MAUrZMJp9EP z<2Nkglcc{EI&3KTCAr$B<1m4qZQvLyH*G)XJ5qQ}5Z}IoF(TivqP(YbZZ*CdU0*0c z6`c|X2YZj_a6jxgkbPdjx+h7$SoAdjGMz;RA-+q*sHee4a`=lnWCCejO9VV2(K?7y zbLf?+4Lv4mQ6+P{xDOACNe|ykr#`qg`++nzbf)U*%Y2q$&pxhWRKMpm7#s=Z)9sz* zZmZA->5Y6*9L5FtMw94SK_0A1v~ls24)oP~qCkuY0M7#5`xsSSh-EXxuL(J7&n&AU zKFwX}|4PsN@!ov8smJs(7nG0)RJaSM$?mGBLz#83V3KijLCwLJ(uQ30j*UX`guLgK8C~{ z^IH0;_z|1;x`kY|Y$m2o%S#?K2MPGke4~%+sF^+yEpLi_yq57JDGCF82In8gf9*(p zSdHE2-t=cpWv`nB{jp%72k|His2d#FTMJ&J#xn$fo;Rm0tj3x+;o9K^Ez?2+;mm_EHOWd~ zW#dNym0Z+^|Ef+>80Pog>&f~Aj=8^b&8a|2|c(O1dXzEG2I3H}SNt5xu>FD49-Iv5YjFFc$VW-co57zF= zNQE2r?Yj}g*KEAowqBnX=5YGHoQM#?dkLjC zW(uq_O#Vfd-k4-K0ch01!Chc*$yDP-^m{lS3y#)l`qcp@6sgDttFHPoSqm%%sOCXy zyGtD|-Z^n;(Rb2L9GUdZNKPQ~gIzMaivBxxTR1?z0w90UjD=RC@1xNHn2qgYh zOE1}z?uHDXh#oYt+ZUb0OdBpFc_nywg=CNghwfw?>@qqya633Wy__`2UZ$ktMey0o zNhNqub(S@)@WM27pKgJ3Z&8Yc&pvu6boKuux#jRARJ_0V9G3oQ zvWpgjGmq{P)Q8GTA15JYE}eE7dF5-P@ULr?GZIX{XT;$Ehp$g>*d<28DyN?o%~q)n zwn%FpR7q>Je_N!Lt=7)x9m_;s5b+oNp~dw`GIQsby@!ninxHlxN?l(Tbx2eWt$P7% z^5uB>j(5VwtafuON%UT<9+E_cK~z4V0tw7kbP=j zQy`BQIK0cZ(m{`l9grCUjez4OsQmw`gR&QI8Q2#V{7RJnP5Gxi*1L5T0whn}im1%p zVx~Hw5V{BeW3x}X>p6V-;XsI;xmXLs?SaL`;#oRj9H7C*-}q7XTTp}?LfUPbnJ)n~ z_^m5*C_?u$b{(7wiOAK$ZsC8-OR8RGi$Q&d{iv7pI~nR<9zg(l^d=G&`g#aMzPtZu z4Zvx{Ql3fL7OilHfy^Q24FR){jawwuoN*@D5Xg`l0FbtSpG&#YTo$&x1R%#wqQP(9 z_>89$4PjsVfM-WSC4_5+_*7(898yugZk+kQX&vQF6OFb>WS0Q6I_ zol{Mo33rRxVCNwVC~?2bSfSzd{XM;W;@k7amG!qUPHVwg~{mTDKlSoy+@P>0ws~sR|*P0BCkkU25<%!-pUU)J6=BOSUg6=P7JBY7XD! zaL*&g+lswE7k~5-E&ZSP`Ild?X^LDq#f$3>_Pud3_zC*Drsm_HlW@bDnKn6?C)cEx z(8uir2e|O^tY>;u3a_^I3P_4=D5UaF^II~@Mc^#)bGoLR*_o0ga%);i5C_GBZNp;Z zbWo%uJ(A`<&J|o=4{Lu)oakO_KavHXs<|l$CUxZ0MLLY58~Dd{+Dy-J(P!xIW6$PQ z_gJ9`?rIZ;M0T<*em=n`^;Ex}diE_DW~W-?lR&ugp0ba~M_sLLUg72k-w`|kLYAF7 ztcbHll1%x-^rB$X;&!*ij|ho)?Mwgv}o$oRXx2AB}|^HO@DD87<>MzA?!_g@9U|^giO5ke_b!^v?~4}Bd+RcQF7nL zTIJZOlGdzhHa%b7n8Xr`#1h*-2^M+IWgee(3F+&MYliFPHeb}!B)N#msj{0Bq$_ixrC` zQ-qXRY^Mdz{Ixn7KPw^mll7dFPH6_<&QqR;;lp7Ka3mR08{JRff&i^cl#fVy!sp{5Jtg+L=3>GlWBVkKs#$ES8tz##HA-nMd4r z8IYc{L~k-*n#=~>CY&O3LdlB`9bGdM#?0!%C|wtSne=6Wp;?7v;b@9-E3AL+Dz3~N zo0?8s%AP2p2c^r({JLs(t$F4b?H|U?hV%J$?A7-NOKj=?CYhbA;gb9~0b89L9B-N+ z9&l)2hs8hoR!X1leU)JAn-t$a>}{xy6Ct0&!@Q2`FNlHuwU;3}Sy5Ir$-pg?-y$Nt zW?#UohH8BRHV4Y1FO%ykC#+w1dTpZ~18nMi1O%@~+3z}fdI#IkfBE%(jGb3&82{rq z^*Z7wgWztl!NbP}jsyKKG8eloM&BPKimbjlZD%#pBndMluOIwz?;&WDgh$+_%E7%? zJ6)l1FhTO#&srPo{N~n#_Ad_GZ6i-pii}+)B zoT})9MwAMD{QZo%nEEn@G_<|r7lomS0P6x&o=D;fzG)ZS`}F)xg)eOOB=6+_(pFW0 zrz$C?5t|)K7*n3CYI?VmQ13@nv8O{rLlja_jUZqP%@l?)a)R==;r6PMpf;X_dYV!F5j1d8UJ^7qYu@n^{CCOWZ&6Ahrcf$w=NkMY}MU| za&N_iMc?R&%M2skpt%oO28lCP=e5{Rz|lR-!FTP3*%G-ts*@CR74kt_H45Hz*roTd zlEScHU?C0gpC?2+XH9;AN2g)i&0K${vtv3o}W;>I2ZH5Q4 z^DyHoyYuqo?br*#J5*(>L|633ABREb49$4S9)9E0_Snh!XrwjkI9wDRl=O33ZLlc#5} zg~MlE&6|`&wDh18d7h8j&^ff*G5!PbMcnBG!K%-_v&boL z5jILFxBP7{VLwd=;J8R?8_3eDjJ;>&svKTbVo#W7$SLC)YVKe?YI-#u?q+zA|A3_q zoGtXW5*uPMZ@)>K3NC4q?eaa$`2tw|o}yM+_Vz44DKv*)8Di%6C4`dl!$Mw&U!7p4Il zv$#{i6?u0Ff|W5Q3KdtQraEKmhtD=g1S57XnnakX(VK2~;bn}-k0xbgXGZwd|eFokc-a)*fV8;#h4B5|6UukI# z4feJSf}2JT-t^)7ZUveafLaN`-0=b)UUGbAZ)%kM(kKt;m0F*RHDBMCdTZK)`EZ{$ zRuQxpNqn+pRtfvQ5CWpca_sqW^rwT!N}1$c(Fr%pB}5YbKmbf0Di~H&DlQC4V$9} zhb*BQ;VV`ANM(8)X5}6u<(9TC8RF(Oh@|x|^sPc;8Ec4`vwUa-HxbEc=ALhk|G#K^ z%eX3=t`GDgrIB`nAl)I2fOL0vD4hb*AfeLTUD6<30wO5gNK1DK0s;~eo~^Ps+|P4A zAI|$b=lZZ6F8Az-6*IGDt^W!j1JP9MyJCT>MNxEuooXk+4(FdVpS?~qHWt;dn=MqS z?XAf)U2B-SW5>ZO&2)cfMf$Ww32XDe$%^m`xVgLXvwa;w_a%=dNJ6?hy*_M@jWOAq zmD2aOpa8d`ZCGrJv=|#d7f+g3j@H*p;*Wr+lTE3`q*>@P+&3dBAXAM8++gZanjw>-lABWr~YdfHic_*e>+yQyU?Q&)?REB>@r#N*()9y|U0elDsF{WBto zUUR21EzwZvww4enZPM=V(OxIf3(E&Sf_K$FgWRv@8V({G81RtgE;#0sO_jD4J$N_d zWK+v_@KiiFWLH2ydX2ns$M6Ju-eiipAz!x@P=kT4bzQ}fu5Q!B#4UCQm}ApKxD|a$ zqX?Ivp`h*|V2q1ZJY)z%D1=dB*H-vZ&9pE9W%u3vFC)|Pjos_|xJ^3RObW-?-(~a6 z?Z5Sqt!+L^F?`MLV67sT+QFvbJjrW*5#*{JYm%~_Y=;D+geb?dDq$nuCsj9f{qBLq zCkZ*fQZNo10b`~53tkQuiov0SXrLgA>FT=QaBMcQdZSEB06i2TFw`#$H+LYfJv%>1 zZ`t9d8fMsyn*M8P!9TgK4cDuakEs%Z|0-q7W-w=k0A7B!HB|)JZI|rB{qwS}7SVRF zgKA$7sh0tbANcA8!r7i-51cmgxE3zexX3x*3J=ZX7f(6u!eTuW`$(sZBszrkMypUc znYf2yh@nrC@5g5xgMX1HOB0z z5C7}qpNkylr9WauykH&CxtXsx_+)*$7@u46&ZgZlfSMKK3AFqWMsmM<)VZ|ZJZ`ZG zHgYLCe_Jwi$`np|Zws znal%#zlpk&HKJ@Sm)bh+K_9AveGPW+34)-Vmdd*eyq5p6{5ehH7( z13OxEx!Oo{mw_Fv*w*L%;+qCRN)N$?3pmqTM+g#~8wXQTIK8^90tEd=42SUWTes`2 zz5u%aleye@riz9kMro1Tx90yrU5jI7Jcw8IFR|)gOZanH)qCu$o^9pQV=QeMU{vC zG16zGz^0FiYuU~{Et>Rh!G#rk>R-`#{g04T@DIY)P9!P3mscN6CQ}ItrhB1VF~x0y z4x%Ox=lRkAn)8|_NUD0RTi2IA@U*d#f5|tUUH5%S=ard{6tn=wd#z^RFTyC#&SU=} z5=Yim9kU9*E61qe**LqCIPkU8-1u>10aP^8Gt@DA*tknhKu)X5&O}nUqBG; znwY7of$Yg{_jgU_$vJN>x3hs~yVXQz$(Lj#H$M3l_^F)l8~g3n&5!(8ZQ`eNs8rg| zrYt|wDFvA%%broS$CnEd;Wjl3FCxlmt_tr+_9_?si~Brtw))KUqN*y0N4J&Qqmx6UIL5g1I0_fxa6^{+2N{ZiTygpQ4I9As>Y? zy;IIW#+sAN)OC@;?~$j0(G}a~{Za_d)BVKSb(R(8m&dQ|@cN=!+A>cjpQ+y5 z>`c+HtTSja`ik5FF zS6YT@?3A44+ydZ&juMPUHRKP|o(gp~mb!LN)jq-vUe> zME>5ZE^+;wXqKSP(!D=}fwRPcz&AX+o=e{k<*Ys+Cya+M*2=Ful`Z6?Cm5ugw8?Vw za$OBGUDvO2w%J5=4jtG0e~70ix;7ISDPp1d;#ospN>XYdmx_rsexHz~a4>uSo=|%- z{VGo9?ya8Q?!Q{RpKcsVM1(wB(9vkumzYqV6>FN_UqEXQD;s-9XM6MZ;P!4)*SfW< zbJ9Zp&=wNL4b*WK6Bpz)%DLg-u<``E_Gnb^Q`I}qhiej9gYT`%bTsdrmp6W2?^Nq* z#Hy$r$i0L9%{>%s8DjZ6{xCqx3J&QVtrf4j0(p|0)svL^OrD*v$q2KSw&?UpV}0;V z(`$(A`9F=gSly97z-IlZV>7Y4&DxTjIr2cZb$B%o(aG98LaXB!P(BZV{L=NyV z{yml72WWg&8FgQ$O{dj-H|3e(}Xesp28maJ(af2!)orH&P?A68RN zc%gd8+_?8(llne{UU=@0`#1I!u>=|R1BWev0k*r=j1-*=ch?$WXlWAle4bwXgUrnrHL|DZO^mat6^Zw-nnXrU-Vx^ zK?`q5VaF={Q-}Cx#3i|6b%ME%r3SZ$j2=L>F9RCn8V`09TB|4VsosuFD?Y)Fr$x`4 ztkzr%SSr~VcDq}Y)$SN8xQQ~%$UpA}V;cXS6{`}2jQ;qClO{B2=lFso{35Tx*&4MrOH+>5i8IMhh0^XDH zU^gy7JBB=ZXO5aqzUUWVij3s`o~Ph`u7$gp|E8Z<1MHxf9T|0u;XHfu%I9H{s6VXm z57>&W8AQzQ2`}|J6sI!VKGq%D*2$HDWZ62T@lIkwyZJRB%`Gw@^@LmmJq|jv!q;R| zYtxTPL)Le=t22kB;i<|_9+7PN4htnQctWc_EH3XfT)WF_ksm-ylM+el%J3*HmR==#N`0eSb+Hjxm1iy~Ih! zV(A>IcCl{BB-G)nt_zV@t!x9GY#H1=uXm?-SsNY8&_>*=0@m|Q6_;zCrV@pTAb0UHeG4rUhW%l>&#+vm}-We3` z%=KtbKs?t)BjKNJ;j4{{T!GAcFPPgi$U$&~+CIilS%ZQOa1F#KKe(sjfU>my7YEACdG- zs9*RbG|QPQ%-#gPvdU5(h(@bdXOfGP{4R}{t!!W#>3o8B0zP_QlkuYlRy^yj-*1>0 zd{qcZ9lEbjaA+oE)zuMWDf0+$$s-?8N>f}I@Re`DXbSxs58&nV9mZC2cT08Q`_JO6 zD-^~R7o$^4%RHmyBzcKtc-kWT9V+N3(7SHNPKaEtY`IbQtHoiB+xrJ;C*1K{l-Uk|e0Pz-1hPD%itU0ss?>{7{92kLOoYu(c?F-f_!rQTep>c@ z6NCTOD33Rm?2dl{J)zvtjs%su49N_9)v{!C`n;m|9E+85<>6`G^Msl?hqscuNl2%N z3fH1?@0x&YA=@vdNehd8Q&i#>Oi&+7ckmr>%xfC>6ohGI{oxQiYYc;p9Kjdn zIOK(>rhzaVHp&fb6R$eM-ebX+;|La^lm_3~TIP<~_U#1Oo)lAmjPYx!hgbZAq`ZfrrgHOg>AfoK-B%}Jp}cz}LFfUuWCqb%+q-7@)XKw`aTBF? z9{(;gSqF7&a6`=s{pr}I0oYlSKVNi(v!q4G;RWYoA$%wCdL>H&)pKy9dV_^TGDIn0 z-0Ra1e0R@uYV7`-DZazE!K`J?ic}>*2aX@$#mwC~OmkLXRR2{8VNqi?I*z2;T@@2y z;Z+UpKa0p^u;f2;zR#gTYDgm=JZ=V*5wizrTq}*X3ZvPciT1@$tY_Pt#lyB zjH+U9LVO39EdLeNB?yHsAXzH9oaOtL*41RwvAcGOhm$Yf0_rN*kr*&B2~%dKWI zN!C?3FB!^UD8z4L!7VLuJh(zbds5xCIzKW4ItCyeM*}=wF-gyb= zEdJpd0Jwc8!|ASkD#|gVaL|884*MXm z1tNS_+Yok;M&PS_!NxIX?Fd@qfkg!X0?}Jbm*>Y!Sz96^(0FZN3Jld2*JqYI~>2lm1jK$=7*rYuhP zZ+3?>$a$)D16){_BpWL}@X!%|whKe-^!+o8-yg>Po(o`1ouJN=&QWTJntlQR zaF#KF_#tLLnM-1ck3I= zx9{&(ZapZ<0SPH1mtE!=Zu$ZXw16UXH}09#aRH*!@w~J(JceF|&5Pn%^08KuYODL=}lVFGJYgK>XhT88?=#Ejdpk!8#}LSE`?U6lIKO zur8~!xM2VEir=|vOJ3`lnj)9+mq=opMpX@>c$|SGFcKetDQd3sWR$1~buiba|DO;M z*Yi=P{dOx|DD8ZD8`0{gxe|@Qabs&5fEt<7oIHj+^r5Vxs5;i)!|wj_oENI@F71ds zRy3&au)LGuvaJ}TYVcFSrf=;YX8%98T@GOY(+57k@yFx^YcWB=_ouqD$aMU5KHDxA z$!CGln2XQL$-bDC=R3nX?pc@6!~3|3?S!nbM!>GDLtfH^s5p&F%Jpt2ePc>6UK+*@ zm@u)D(a?-0p63g*25cfaYw~kW=!sHEN!tGmMWoK(k1-;sgc|qgMTR*R4!i|bW6!&w z5|@#x#zTtF%E*P*=P2lp>3wXtj|YHscFj@Vmyb?rC(y=^2D22rzB&8z00eYy>Z3t2 zxQ*U+Axp;OYh32e>)K~$UG5JO+XbLC@K9J97VB^}@L7fQUO;LGCKndE&64Lou(j+i zbXP;Ut8(#5@ZKyZw@0|4eOMWPRFgNErau&r^a*)?FCJHU&j&CZ6Pv&M+XFkt56TW7 z;f5wpsUsefUkLSc(s|2?SsYU{i`4FJf`_39q!C-*OsuE%`& z(jmcSN2b&nwjJ*fy#hiOx>ErZbFT~08)PFKyDEAe9V}4Rg)fa|+3C2n#R7o$m z`yS6Jd|%JeTWT46YtB^<0tR02#Re9}^-{_cKl>Na6uNB%A;~c-y9IAlvWXeEV;M+3 zFB|MUj;Mzy8klPx9B};utiJHUrufb{maM}E$&8*4cPpli66$LRWtRDMLkWlk_pVZ4 zrTgEgT3w`qF)toc?yvhabPVR|*@NBtA#+0cg(^saalR@zt$#h{v`+t}9v%7p#4s6^ z8TGSOao1Tvw@hC!{YC2aV-&jPXwXK&nmSo?%h@h;5YoN9=2;~x|DFUK+-_7FbUfID zsM3K9&!SeD8}p3BHt4FPy+t$ljqh&$GxQIxXZiy>j<)vEvM_MSDAxG~Ni@qMngL+Ba}i=KEquE^h4~Bi#@!QW!CN8ZC zxBRYaF*6TP0E(lxHH`I4{~B<+*UR&`N3(=DXsZFw@~Xt9!>Q*wP1Dm)T7=J`ilC4k z;$wMBO!;L`In_8Rb`{AsR1&n43s;n;d)id6msVmFRyU*RmvjSe!l>z?Exv)yQT-xp z?Z>C6Rt<$tUZXy1N}cWBUc1;f|TgdA|9b98E}f79z5*A=2?w z0YK<+MCo;?#nx0D*khwIr>h=hYib!-Wqa@lq8?FR5});aNnNEh_N_mIyu{3Vsw^B6p6d^WCHS)hh444eN_ z$~5m0>p+i_MjerOwqCUIQe)s| zyvS_bwi5c5pGUdZ?6K@EUg}zCixJu>7sk1N$@jLHd~)O2>RX)!z?MI4mM6k}CRdwP znZw*J%+Nj**R8I1-7OuPo5d6=k@%vp&n=1@V@Y);w56Mjb&$DzaCo=F_P&SoGnT;% z;Ev5mH*2AK0{{h)cXj=_Ox_bJmF{jW$)_u-xOwfyM#(tV@*N?PU^DkDsSQNKigy9G zG|E~jb=!KKgB-I1cgn~1HvX?<4o_98wFx8dMXX`rEj<7B;9m1hiVt5piT^lRbOIzJ z1GGOTIayT?lG>~BaC`gHg6;5-DfOx^sLii*Yxs?{p-Ro&<_2I2MuDAHZMuc}gm0Xr zv%A70V;Db}w$Qi$yF18&!f9fZa-FwwS_fr&)FmXSBjNA*3^S}5qgl+$>`vwHP?s6` z+69F1$eUi?l8u6|dUuF;^MA>+7xL@4tH=4YKk}^rd`Os;S-eE3xHcH0v5btA)l~)a4g2%L-yQ4H1*g}qnpP*8G(U8D=PmB zU98|9zn!jB;3m5uB}n1xp~BWMva( zpG(qMM^*Ve9(^Ec;)>jh{BhT3E@AL16{2{|fhbI)KNElJHW(amt5>HB6mRRwq>W)0 z_OL4{2TVu=1&yq{yZL44s9ucp(#_}Vi!oaK-zNr<+UhKbA8c-Is@)G}){4}pEWnacKN-i65f&@DqH8MMCg3PX?XskC_OKp`jh9dugZ z)^_S=W+QR68LWiTRoWo^H8vOxU&RJ!AK9U#K?3OyM$VRz-#7B%$G48PrQ7j;><&sl zGhHBFI5*5X>o3>)>d)6|@KcDC42(DXCy!+MP4({ynWi{1oC(6|!gx$h`IsOM`v@ji zBg(zbPLXF9rY@0T@K$hpf60j^_EXF=G7BN(mMazS$E2XQ$(OxUK3g4ceDZ|qvjDgr z9a}H@>LNIH_Z8N1k@wh1Dstr5wJ~w=v|&9m()!OayH;Vgg@oLTX>An?>)8d>Zl(vL z3-ue#gPDGZr0mvy-SoOqu0Zo)Hf>7Asi54ZG2t?%II4R^>RlP}g?r~6%Xn`gd`8uT z$MHRC$AQ1?6-~Qhu4MgB_}W)hQl+h;&qvA|aNhW2*(02QnT?prR^`+1QPYc`X0xJE z_R=zGa=g;AY@FDRLH(iP;wyKLtwM-YzcQl@jl&3FqCQJe9G}V9xEtvRCE(#ipdTlN zhSKO1ZXJrttez85c-^}%wzL!->`nb??C<&7C17)b^S_;dyYAoC&Mh1jlVKcMcVctW zRPiF&0N*6h4&H>2A4Rkd^lZ|^9bO&j?oscwp;H5Y3JV8iuAFaSw3~qowz;xj@0;UF zGb|+!v%eql&G`4ZMBax-na8c+i=aYI9OLO_BmV`U4S|tKcO%orak&b{^gx-#urL0d zi|PEjnz3~0>-&w7FJ}q1E#47ek9i5mH<>L5f4%VEk0Y@RN`8)+lTcYrB6nKEr+E1&<=s1 zBS%oKwdJrTo(kKT;4iGi@dPV{ts=SDLlA$c{N>VmD+Mjp@>Mc8YmQ7p}?zqs=slOuQMwNX>X zqtuq(E7s|*9-wrckNpmO|0wVsXA5gr!1phs0*;U7C%DXcqFDXjn7N25>4?2NErM4o z&Cgm^8%?$y&*~pQ;@?qY=zjd{){=4yi*o-)Hz~OBXou|%JfR6w%uH;m-_X&OL|mQ_ zlcvI22#Y(Dg6{^v4hc8sdHgmE5elQQ4`PNM?EIHuA$dWWQjrcv!0D}X{_J1Pqb-Nj z9a2>EOqt;V$AjQ(k!kbK`f&s4oDOemcf>LBxll|xH=>mjS2?_$FgqbP2aisf#6u;g zWse{T@dNO$dK4q|7PgOh#SBuhg{2RvC*h46H;1yPr^J0Wce-1hQW=k7arQ%?<%!oX=lz_?IQlA%GR02cN(x5GGp z{5v4TY9<0X!c1rKhH*^($LO~Zq7l1ytYxeOpTB9SBAg|ns4S%`yzC8F@4i!cJr#fc z`Tx*i>$b=5?cf?A0I00LNrn247#iRGk&%4E_h^SK$on^*yS^%5{F=l&v%}}ejzRZc zGh;U79RJSN3;dr;$)Z@^i2=RW_=U?apT1dep`!Sa+`{Oi*NfSzak6yE7xy9@S2 zx-R_==^KKZt2ifj>$<_n0#BMPoBsT%elWin8zMgxhy~q9BjetX%lXA}z+h)3gCd&w zd#@`8A>JndOx61vV_PJXDX*sTO!%FCQjfpae>cQ{xiPk(meENN>R8L35NEhA-n|O| z^{3cw=z??D>oJTLWm-4oKebV6Z!W|U%G(kfHis5K?cK!yZ11AzlDvFP>RQOyKMx2u zt^O%d%#zk56(1K|#&Aw^QTgG=;u(zn0;(k53*D)zEakGe91qw)avR<$v6fE}bbi{< z3R6Y;zq!31AP#@DgjR0L*+({s0W}|^zwy0pHh|ndChKW7@zV`6OXs{4{InvkPDzIZ zhu}1;gPi>U%ewTnB-F|jqaiQ;p_iFbW=YixitLN#BeWGj1`R%{uDYt**8sJCn7;L) zG65p0V9IiGXNekPys56|AqV%|lfR*UmgRZsgc^BcX1fF#kA| z%PyzASCG8VzJ;p=800N62ywactPZ2FQDNPuI_uPo-gpoe7I_xgKZ@_Uwf+L_r#KlT ziGdJ70Y^v1PxnkXfoR&{c*f?6{H%>CV`n8Sw==B9FmbQ(qC4(x)g#7QIsXBvPC>ss zV@XJ#?JfOS&x1l+2EQca0Wr0VG;J9;Doy$GI%IMfCT^wm^KgXp6w5SM`B2xbu(vEr zsV~NfEeS`>*sph>9aLoKD#*K*;8UX!eS@%t!~&Tm-~J&BgIC&Zs96TSFcON0sUlo4 zVdacXVS^X-27E1wpI@w-*RTjPF8O~3dp>2KK_?yVHD|_9$+0Y-U*Ysq3~@28ZHd0z zB@nh;Mbqg^p0q8G1Xsat4K<@UN$8r=(8uYGQk_HI6?CDu|o&*+29C;oBkr2d5Roi58l&<`4h{au#ijL#q7s{;sxjyTZMhIY(~{mK|LmQtf- zdn~+^%3=qx8eiLT)A0TRnrcEL$mQsdA~J2FX3wASUO?DG6v`6fz6b-Qr8+6KRyqQ~ zxtjQ^$Is$wQIZ0lQ}>T#Yp>7_@$J45A%T*_eTS&#TV9>{qxf(?-uXxR|iA_StF zqcbZL;)llGGhCO>zc~0j)C7)D5IYTCB`Id&L$`7&^kP#2OlBT8Hl9FaEEB5^$$7pIFvy+38z`_VI3M4CQw*aO83?7nifm1l(9BQU(8eZN+OOA!0>;FO%f~jeE zy1?YePC>z?oaW8A45A=`XkGxZntq)~Uz=Aa`-`ni_=^D)s`=O+yfjxzu=Cf`q^YDE z^aw-@b@R!kW&+-q@!CI5?feI`+5rOIz*cO(=wY>sd@k}Q4ocXkZkqD-svIkCYVtIR z&(_@U?H)n|2v;8ro|3bVum#lu-sQ-;g2J;av0{bY5i5QzJiQ0}E8nA|L+GJl(#%(2 z@R6fK0Yon{s2a=u2xlJWFAk@n8l~DG-DuBhsglC28j?eVW!MFmKV+uG5{RjUanwvC?(*>;ek7eO9x88p z!R!ooO#tB_fLQ}UO4dnV$d4ycbqbc>$(|owj^Ukef+_% zjB@mJ2=4haCN5BT;$WLj$ng-BLiUqZ?q6FV!)EEhW{)-mNyn0Ss)Xtz&R zNh@>-^L@7n!1Yd3xl>i2iShSltBE`{eDUV9&ASM<^>_yU55cStpbG`B&COjB%#AL5 zP}Hh3G$zCGo`X%=Ag4D4!E18!Ozy*SNLZ3#ci5NEgeZd75Rr_6l3V!+j7O|rX=Y)Z zg^NY@?~0jz<&BpQ=%RgC&!E)Jy$iXUf<1!jWWHjq+0E(Zyw}mv>5*zL3?6YPv=QQ* zDCMPZc)L88v);dTc?AZb1@1)GHsCtTo8wGz6d~z38G~@dYOz(lSuK(`@!E~D|6Su^ zikoW*YO9vhKxh-1frv9$)5-wX2igKy3Nm@$pJECY0Ov(0ydm09gs zDu!uyDRfd*WDdWZC!^>?b=e01HWbr%P z^JG&{8EAro(}jqiVAuqcq?bQ$gd>+Mu%0dRC1Sc2kasxYocDvZ!MaUyy>NyOx+F)UwC-~hwkPL1B`f!&i%_(ai96r-9vpzLEM?u(OIeh3minFB*lY8? zqUt@0s7GoxpQHVK$Yj0asnKJU>kKbfW*yzu)rw=&H2n4fp0tQm@6o*EMVK=P`!S!v zn$v%z;4_gr`sNw`nPnY~Qv7LL4|cG)lY+Z#qL7Rb@DC_-%ZGlIKPM_{Hn%tENhtN^1Z+HrwCa(AyrB9F}qb$1J6355}hulx;JrnZC&K z1CV_}uJ`%a+W9ig6ZbIaAoOfw-}BD8MJmVmh8yao5cZc_r2ENFW;2^A8_4gLs+=ON zEq-yxP|ot{XJQk4)&KZy{&YACr*V&@HI?DMP+AIM!LYV)qm>n~gEa#f>|_&D3%EH0 zzz&A=1pJg%36Tqz7B=|~+VDwPh(eHn8HlDBt=-iWx}H$jw#A!8d^vQnO}pEIH^t#$ z5ye3h#lBxFet@3iUP_OcCKa^@GXF2&v)1e|m*v~ReS)99R25j%kfDL$O~kZ|7My;p zyqCN{JzMw;s{?}#Wlpr)b=d6vtDW<##J;b{pn@dh3F;tqF%aB{e;X$GsEfen$X>6nI zRipt~3FlF{XD>JP*9a*;xj*%n?-9i!6($?>0l_Dpbq`Dw?=6C&uq=PM5`{qq5)L845^&Qd{U5lJ9kYXiHFk?B zr%$%*Af%s53(pQUD1wyp#NDx?XHDE!UD4$JY8b#itSHin;&;l;mG0=R=@W)5>8cs* za{5OmxbuoMwr4NY_^eGwLrw2(wUalm(T3IVo4_){Fs(xb5^ZoIv5?7rop+JR`)08w zCE}}@*61(nF(uZKBY;toiOOGIip5xI_ouVc_Bx^*am|zFUwMqRc_=oOU(WZ@@GFq; zQS62$TT7T`xnfos{9^8_?&22*r;oF!vTpeKHcer07?5?KJ%mj(=6P#G)y+U3`pTsUR7-E71{Ty+q{IH2R@#in*wV$tdg^(upSVx$2G79`^T0cBINFD$%DkFX=ji@MI=4n1 zt7!T`%62QtMAInO^b>E{hN;$+YZU|M2k|F~4Ex)YJK1lFK($ZVgX2cS-Vu=$I@+i9 zAh$HeXCHoWB{=szSfpx#(0nO*dgKQ}+5Ij(czxvA@9%#bC$goBN&>2LEKjFOB^jS# z!{MP5<>8Jd(#gtCbt`9vc2o32I4@Wc;qozEq>be9U{K%RmF^|&V*9k}Gqs-BLgfXl zG%?o`_GT3rA0f!*5A6LTx1yCG3)ZXn76Ja2c8%E-{A!fp60Z2*#P31xpvnl@23|Ua zdVte?9`+ZY&%ynVx`5IXu%8XYb)^GZ z`kjHUnvZwnXVPrFfhEP~$j7(N)_?j-xZ8@V;8AW1hDssl$0oU}<2deVcU6S72+mJ} z`cr(Aq&st0pYZBdGTYy&>cP+dI};SzJHBzuYQ7#DBWMYc6aZ>ZZvdCnOAy~@RA27D zP(+pa=PJ|@NZ?-dxDu;J2nB3?Vbk5SK4*U+d#{ZO;%N*B%v>;gFPxF>Ul&LpfL*Pi z1}H?m2ab|ERD+~O0EU?l(=XtQUTy?d|Ik{?Ici+geNq5c_O_0%L!WjTnPE@s?Q9jg z1I@AF`DSdqiq<0|HL1r~&$(V}Jv>ouIGQoWgWEtt({}>t$mtSYZA?C|hw3p$xTy{% zCt9`L3OG!@<1}|CBRDVBuF~lOJ5$(IP6qiz?xMc0d2h}Bgi;f)BMbJm>_d;5pjMel zif4E?=*=h^dN6yHN7IUasS()gP@+-qpon@p` zEo73iwQHhNgX_c|H4fJ_#;Tv*P-lU~NJIcx&sT3^Qfw*;kjJzt0( zmsELB7ugQ>s_sjBna(}zrY5F1SRvp31*ESGkEqak7^NmkKS^`$*1``?h}()T8POq_ z71Q+n<_jQxsDLJ@5~f@i zIFV@fGtuu0N?*8dL#+$negUx$pe2}n0ALn?&wx`r5o^c|1)t7t%Ku1>iV_=0;BEQ1 z9IICdr400Z_>D6Cb^U@oB7p$%Ege+xA7>8;-Q2guM}8K})|Xj|YQHvf z3C;VV*x;Zy5>O2ws=aNF@??7?#wr0R!QuX{2A??1JmOr6(^5T&G3*jsYLR}KQU65> zn0#mPUV5TN;c7_F&9a2zLu^>nG(}HhT7i;;q1%zv5RWa=jlmDirf?&z^C}{$H^RN& zZgUG9hHK_>v-cIjXjGYO}+1hI^NnYMoEy=2^)372v{m zSu=;1W-lh|j?;`z#WYU{OcLbt(5`EI`Yy2Xwc&RtM*dTM$Owc1T`z9#+TO8ZKHgW) zU8%$v>_b88=1w>2u+SEAY1bimmqA~flFM|J-&`vcH@ z{Sw41%D7fS1A5ytiFv5%;!e@!HBUAP)yQ``)f@+neI%u>CkzY_XZ%pPw5S&ZjzB7^ zzI*3hq0OrU)udCuUG7DS#vPN*GqKB=k%8J#^rpvSglC$LIVTK3f%nk;p;HHs<*)A` zKgrhUj$1Po^h1RIAhSMhiZm{6c>|m!=ewe4H^UQ1hNe5~|JD50B0?@N4bys*=fFCa z?Xp1Kv~x@2t4=UxN%1#4n)%0=P)?Nso7Wu$XO}lrn0e4WT|#Tpt;P0sesqo4u#F%y zP~>Gh_AXzs3#j}8`fct5)-yHwx!XE4KIUb_ka>2fh=M5#{+6tPo14NckR8JZhgc;` z=iH^sv63=kqTS0^9pgdK0pX=KCrfLA0j4X~}1oLG02${o62+0dXt)_J)U3dDumh zvPqwgt0JCliOSea@zd^F$&Lc?Byt*7#UJXcF~^2Jn-rkVyQ(y}xjn0&qc%TNiIpm3 z)~zl_`_#Yw=%QmM$>CeGr3gFA;}0+swbGe>)y*V(S_0GRyld(E zCmwA>zOmyKm)!?}X`{P(y(b{X^0?mCVH}jZda|nC6RjGS51{RR;yQ`8n3Z(On(sSI@r|b(&_Nj_ee(*Po0o>JA{QO3;7SMDKSV=vTLUL=pQZ5;LYT3col#17hJV;iC% zXB#5xULdZVwsf0*YDME+G5PIXKj~LK)y#q1Va9gZXN?{WYomA`48)x`f}*n<~_8o1{J03F~_- z?)^Me$Q-IdIx?VmycLAUP-Ps+qM>})tMNDqB>*Xl0=I`(p7+o-1zm=mf6c-`a+iqp z7Dz7=!?obE{nY~!JU{~vF6m5mF+A;v`4p^RW45xl&I{A@Wqihe#N?{=a}%jn7puis zP{{GEyKPf$77-F`ZlapqGVU@c{I}ZNBE-mhJA7}Blkri=QaFlRvU^LtOlRZym=5=Y zdXJmcJ^>7V3IX4}r8V0Np4cmMvGb&K=@`pIiO6Zm-YxrZA>v@E!CG+YHfc_Lc7kfd zpg}Z`s>r^YCpn=UH0DIiKF#Ak-hv-=yzkGFI*RSAf3q#~DzN|i7|>H-ZrOW!12HAvYw zIjg(*YuFLC(Ij~L1ccc~6y(>6Cv^7a-n9;obUs!KP%@D)iH}a6Hxttar*px9Dt#_& zfb$!)S!{%tQ;+TBb;bg0`J)7pnFG12G~>3d!0}kivOIC-+1X5(_ZmpSEr1_Z>ce}| zVrBHkY;H-f{4Evvx1y8DkQw@S882ZHZlV^R@IISQuq{inUE0lpE{2F7gb+@OzIpg% zj;dPnXTLF`H+|^mfsN-^wMA4`|5^lF!jimc%tU)*1y2#$RsPbOWL*z$^IsjK#^*)D zXUz1;!OuvD#kNCxbokYB_t@*>-8Tq=Z0ldQ;3$O2?FdCU-TaCr_5i zFG*Oa5*-KQu>P&(7*|{o3JX?rbadh--?N)O+{E=79l2b05fAor$XNU|kq<=>rAs5%d}wNO!tVdF@SPH@L~Py2qOZ9m}WM^?$}#7QQm1TRj#?=EP0 zSn8*0uoog4ui@^zutCjrC)(fa64~WfZ2*I%fZ<8zlmEA4yW!R5?^=WquXio z#;Y%iX<8lFXyUPxWt?bnBdV{tW;otnJc{p%$hmUw6nzctCkhK9;!kD1MsS8rJafp~ zD!0dqYZ`XK4dmq%Nl^+IoZYYdBKgDHY^8;%OeB}tr8lkuVB386KfJT~1?Z22U%amS zmbu-i+!BQ{ek((#Q)`_o{afoIJ=;g@uOj`dbP_bS74w(1d6!Zg`>S`A9~|B-Q+6E+ zTtffhYZL}KwR=N`Z_hUkE8z!dT=w$SDmf=G5H zaQ@d$lg1p2>D?+?`zI@)6H#xGOUcfgHWJQfuX-1-yLPW%1-dJ!uzw99VI~n}^fo!P zvhB#n-f+Nq-h0@={+|EBL$Lc!GR*FKWgWyCv-3}ZRt!QDVGC{#u`}Q^?hl>tG>nBt z&|{$QvuYTs4p8I!V|?R-Be)aw@WMmBhG&V1zJuamR>IXgfRV;_MsU(82TZZeJO1C^ zoB!O6vs)$pGkaC(7rvfZBm|Hdgzz5BN(D5AjF}n-Z;nY#+jHcrw--xfI8U8kcd}bp z4<$G&^d^>t#hGj`Ju`Ha5m38>T=rfjW-HM)hD6pJ^*n`|fCuxS7m0&+)amD3s@O+sZlY9FCU&&V z2O-;sBeCznRy%}n@L?O@H~Wc6MZ14H)gwyX9)>FO$nOkO%PgORTx#QG;iS8bsAIvQ z$mlJNGbTRn;~B4TerBHIZ7dUiDLkIiE`D1)Ylb&#sRon!a8)v``WL{S+26-^M1CnT z`FekAa$_}fE28?`+oEc9wQP@FN1dIp(P_2QVr|{WIbX%cAbsKZJb zeFq(p7$-Q9W|J?#?)F!Q{>$ePuDm8|E{$!!X@t-6A-!Pz0l%f@#e*O{dV!pm#j6hw zwT<>HTN^Cc&j%Q5l*-3iX3GTc+>cv zD)4bvy_(f`1sjJS+dsPkzpZ9s7-&y>GIEwEKdimPAqKmkkdsOGkdY`GMl~7w8w_Dx z1sLm%S~{s^o8vJjRn<|D>8mo=Rn?E=rgw^3lD~NxDsrDm6i!2S#+FeICw{qqEDno5 z=De71cSuEM@9NWG?GFnG1jzkl@dbege%urH%RceiR*4n9Sk=$?$-i$c2PP9b>8xd?&0Y zx%&_g?O(K^d=k2=xcNEh={$bDIwK*ri^dO;x8f89IIMDygwxti-5pSr4EIjbL>l0!tdkSp3zaydalRRW%6|8ueXPH+Qt-InuC$fw_bEnV{$a@Ya{D z>RKUzu4w}E_%_CPJ%MOxs#F@`*zf|K;i5D6CQQ4WEU7{0FQ4*Gzw%@(Fzt=Zkf?@( zkH^M6?0POWBy#xOfWPJ6R0w-+Lb#-&S(vQJ58Ot!$L9xJAg8#zS&5wb77+^YZO{bx zXVD(D52DC3c|YX{euj_ys?&J`V>PS1@$izpD;hE~xH-(5z{gI=TmBz!rtzp$lw!{) z1F{nPsYj6r-aI9{=L+gd*FmQfk?k}y69WUJ{rBpJk#Z}U_T(IuS#4TEajZOb&DbEm zJ{)1coSbM_GFg69mC8iU0Tx#BIYKX_O}IG>XrAo@==u_XdoaGq6U_K2fxbw${1L|T zT|d6CJlYWxF)$Q#f^&Kh9EIjTjuxD4{724gA(juRbSEy0H(KW-R|UVFh$Vww+*S6Uzeqp|5qY`?y?QZT}>@Zfh0Z4ley zd5Ylg91?c{NA@$jtJXD2d+6%oyF_%qlGm0aFLCr>HY_!W$E011AZ*TYuNdrKlfKZc zw}^B8v(^%Jr$Y9c{RYsJegU!;Bk4oH|e9*^?FczIgt=Dd}! zuINGEJ{*JXtahJ&&+q?!nDIi!tcqyw5f~cq>rw4TCTF;(AXUm;7I9?Oeg9z_H*NKv zrI4xt^yscqZ%`l$h<9N@etwjpB`LH*H-*U2ax72gyqqm44(l`=r9;sBk*9w!|3Q6M zU;SVT$8_Jwhxc6@<5eBcti)xDhsC6U{bxVx>zy3Wt?C{)=>HLhy^0KRj*g#bf7H;*J=-V`%f7S0uh!| zP<+R;Kxv?Vkr-BR8cT=56~-F!SH2I-PeX%2Dd1}W(V0Rfe6kd#Kck#4-> zSBYP}pZ9a`oj-J+VP^MUYtPziJ5O}2E=@NdO z^J!;uTUJ8re95{;?r1?^z>^N}>#&}`jkzkrECqT82k4Mf5N{#_H(z?$>dWk5nl!=E zg=H?)M#v)eOnVzy{YCiZp)Jez5y5LHGOGP<`-zXSG;9ZE4hv%OGQcm!z$=h zZugW(#mnl2oGd5LsTx0$$*QLW=)n}wZ~uquVc5Gv%@!@cYV7HRfLZgCN=2V~32g(J zE&u6&q4j0#aB|2VYW!tr0vEAVeoaBmUTMF2)$ox@C>%UYHn;|E+BzyFrQ?MtI%D); z8DsrKAt^vMD=8U!0=a0l-cB9Mog!E95;K(mc+L+y4x09UffhJrpc6Fit=9$}kW8{qTtFn|iz?JRM}Cm+o_wc4h_^7Y?}P6z-zZBMW9V$>Z|$hl+fM|r znS(=ZqBYSi21pfSm#CV)SIIS4d`@B+!<)H{E}qc0)}qp5pX0MV{Zs6Dm<2eVce=fM6%Aw^TEl=|if6Y1c7Au83aAD>N@}6Zj8Z{4On+&HHjyTn zR}@bpO0pG!lk^E60{}Y}(lm3Oy?>$i{5=aED*9cP7;sh4_z`5n(DE}VBI0TI zo5&S)rjW6bXM+#?>Hn8tCp!PS-nC3)g+?=~OqnQYJeYedPt4-4!_Ldq%jLINTS1xtsMMV#iHoyE6KyTrQMDM#s)#!s zlh=C?G;jt_2n-Xh(hUynKWiMoqD74@`gQ_28N*MJ*n2mjF_z5kz0u<62W>0}f*#Pi zjqvGf6Cepoy!+h?b@5Mjnf%=^R*6^1VuVp{gMwMgZ|1~5g+TuV*(kuuALpizPN*Z= zW%G{=pMRqL5q9Xi15-*X@YpojVpcOO_A39wk&9{z{yN@t+E>x%du3=|_CxZeE=~-y$~_t+(3et-FuR#C!VI+ z{y-LBn-@UP<;m67B2Fej3Lb~IAec0;cBGo8u>VHC?B=utZYj3-Oh&)iS%OM@ldCTw z^IZ_Ja>Pv~9<0uXKEi}Dkt-)gyku)*b=!ZI%uc%~?S4Y!23PivRJ+wl<#uhdEj+3$ ziK7_nJsG46UA50x_F+_37&iNx7I>==AL&9(U&ZBdz*~KpUPHbq0YqN|tvjH3eacVe zpMx3yAYZ3hqfJj42PaS1ULnEng4Pai!Q-No{tE3MxK$A7kwc5!7YmrRboTdhkGfk_ zKM-fGA5RFSI99+EiD0hUa~Ae5?3u+ks66Qq_ehaDEb+y8>-FWMtVO_L;{fZ^*}r$+ zJev>Lbnui74@xd(M%jqvnbZy83#QC+&T1o5ZS!h1b6tDMIQS%eTUMa3q6BA7!s}+D zK~00!!wWx1bcTD;kg?T%Zhw}3y?Ao_+>`k`zsT0W+o>R$AS;)n%!`*QByIYN1tQ0d z9MsFZW^Y-yggYI@##Rd~HBdkqwd`>$mwuE&#Yw}sVEu#Cc}+N!Hcvj~EuIO2vd%Db z0GC3QNYNgo#4E(Rxe<4z9K5Q+cY28B|Kmuy-kxtGFr8R$*emS$x1W~Vvw|S>e+g>) zPbK7G#j3{eemL+KXmf(eqtky3BYXg{yD7J1l*`tcbms~7Q0KJ^_a(noz3mVHRgNY2 zJBxn}$ZEn-(#&ojc}f)DV`P{llar12-N2SSHmbo@Pc$n{?|-rOLz>T zRy`9YbQT1+ZZmn}Jj}NE)}kUx7Rkcil6_g6iIi2g!+Gxg1sFE{zs@zD>>Hf^@G(kp zPCwJNIvr1#YAhP=Csm6&;$Gu%-oc*jOgFLu@kQL6M<%Cf9V6_`t^T($GMO(8aRcJT z_>Lte_c^~S#fFGf0BpR1;2wdar-aN;R=28OL!5hh^$|OIaOtiIll2f}xI}{Tqp}AtJ z`Gpe?1mZi&T=!Dm(NSlAU-Y$7`-RYSMuFOtR9v+*sZtD3LpBR_RFv`i66=QeFu>sH%$k~-q=*1yr2pAG9ADN8Lc$Gu-l2NX@6W?q zcnL%B4ba4Iko(SvowZbmjwwsK$PI7=h#NrE=itTTCg&1o*I)TyBu?%NeAprdzr4t9 zqXEAE-jh5&xd(dsd+q$MOP&6X=U3N1f}TD0g|gZz2+tiA>>=8EP2dQzMvvg%w6GVeycBg`({YV$Tf%Fi zY29ooZ`;;ANsThQaztba)(oDcuSF_6sy=YH=;xhj8l9 zZtdKqH;iAFFzEsUNdZ=pyNl@Uz-#{8k8=2_5})2eCUyfy#@1T^n#otMX5+kS1e`9_ zoo2E@P+dj7ckld3Jt#KlnLaez1uqz@0;HD9J05H&?e?s+zMzU)lv8D88_F}oY6QAB z$o?FhLw0cw=KjgyF%x1Kb_=q>MEN5ZEJE+2cbTz@564mJJDI#mOd|~5HoO#gA4U^uC1$CAXC6ZYsm5p9- zq^X#C!sZ>mFldZYG4x3Nq!KX;d!uyMlrs**;fxQ9k=HSjU=~KX00u|8;_7iZfb|PL zHzrJKfJSrwJWp)TTn@9Y)AGRqMYHocnj}&-lI;{@GDpBh>p=i?w~0i$y5~G|H-x|B zv7-BNKP#%>$^tY|QLen+{_n(>-$<3{midIr;~+D~7^ zzkhBK_A{WU%y8`%D*W*Cb|VXVeeQQ_|2^n7jD{^XC``xzE&(evsAqGu9?EiszWt}p+Rp(K!DqMqnK8Qyqy!3^&I zd!y}{hn#B*Xa%Qf2M_$2Iq2~)N3KoK1VzoQhJ2dPACnkKTLE)MgD|4B z8?D76$@c~KK#->l{QFtc#x(ri+6jV+1rG9_D;0zg&;!UMcHk)Edh0;}(Q6L`p`YhD zGhAtaojRh_qNzP??Uci1_bCYOyD#*lCMLV8sNEyPeIx~IB8{MZJT#LO@8R0d3GWos z>R+q%cz?sMC~}k=+nYg46^N*3!2z*?UNd=u7|4q}Nv#)~g=vNJN!As6k9AR(0tX4eN_#p|T6(B)7l z%g|_sEat#lu+UmhwcA(dwwkKX_>@_2>6L014jcFJ+C8UDrm&C}A@9Veu7bBB`@F)J{^%5DYiN;EpQB3e8~1uZ}$=7=s=A~>TLGU%&jhR{5h23 zkNbdS+WCO}{j=0obqJCB>S=@6^Mg2d3yh-Tm1T6qD3s2Tnx~|yKAR3IqUl>iSmC8q zPGGhzNgadirV8zV8c9=AQG2t5cMFL*2mvmm zHk(!&E0ba?Yt_xFqsv}<*_yPC<1;R~qBIKh+Kz~b)6%`U^YF>AXwr-VilGmB%8x@_ z$)2I9Cpm(5Kb^q=02vm+uP0%gA5UsiPn*t>$F|y1E>d{Pl%9$(PL54>+2l)Tnngvq z;v(fnD{r1l^n`BocEn^gx-(Q2F`BKrl(z2DtP|(F+iK|H@xt%{P7I!tLeqfl6H&!P z64S32>P537qiej36Zs5k>k>akCEHSu zP`YljFU^j0T2{uU@dfJ`l9WizXh{lqZ}wI;ysNA5K-ml~QnqVY;zML}>*tw3O=C%) zk<{N}yA~cMZP}=c(bG!kke<>Js!)&nfKd%8mCLF?uy}TR_HGfX}uT2*?#}G2JedY+L|;gCqG!V?i^OFZ)ZC6DgaLf z?o!o3yJ)xv&S?|<*8a8d_eqLajF5Ey)ZuUPGRvEi48!cP>yi*WHFcl(o~3?NtN!BE zxXl8N+G@O*tZ!`3(b1|SwCE?5HqkHzpFex!tWjJs>shRy73yvO;gC19fszf!~fd0Mw6j(KO`LRRz>cYTx+GJBkH%!$?Xl;@A245bsh z$FWryX&<#E8Z&QCOOv0eyAzI|3B1^~nItP8A%7@MpTn)iPuN`V$S=^3wFrED1vagx^B1&-_;ptJC_mzIbVHp_W~26l z#Rk?t396b)7K#YX> zM)3;Ae$aNO%uNvvCaXV8#MR@_VQha?5BSp=?3hHN&x1egWuixXvY*=Cl`@{`Vt@w}K2G+9&&0IX0SzOJXa0)gH$)8<*fj%!ZX#dbE zoBgSkI91{R5Q{g$ZOYjB;d-EtCCojx>;>5{F@2qL(F*+B1!&Z(KXnvVhubj|@NkRa zy4B^6g%M7YLn;Ea0_>4_94TcYCxgXL3HDI3g`B?3$&S0S>?pq^k@>Sd!WCm0)t~LP zp$93-V34GnPEv$8$Ky-3v|?^ZLi8aCjW0nD7@M7GbpuAagk+^y18ajy`Pa>*Nbfd& zg4C3Mv?gLc`o`{S7%tGmh?n$yqZLY__8(B9!1PDtouAvynWE3PB|x>)l#Cj>b9i$4wht|PqU#k7ZA zR_ps3cr5S6rwdo{#^)b&=edRPS`QKFFw@m(cB$q!XFW#C-4@K>zr{!AkRFHunz*!5WaWwQrbz8=Xt-{kx|8jT%bGrk6q{d zIC;xrC*qyD?LM-doTiteLN`ug&&x>_vhgKL`=yK#5mWVPZ@SKoAav`r8D4Zcf8Tex zlQMj478Z}<1_VQPnN=1vIbw>vnbO!RiWW>Q{{!&PNK8K1C*=P}RVB)T(f?6#bFMoRERrdFht z!};@~C2O;uqth$z>F9TQ$P9kNo3`>#R{%ol&g<=#*2_C6JjCtzuSc{OKWfXX%gTKB z5p_ganCX#H|sDwE>Ysj-w&5sEIS%q)aP( z>^+Zh3kSPJnq=YNfkTqK#R8XI109IJ(V*hdOlFA}zq>un8&Y>EMKioZlqiM=qyxO6 zowTN3iyIqVgKRK6#z>!6Ztd;TZ(445$y-|HPz=`in8{Ky550XCs3M{ieQh%1(O7rZ zwnd^ZfE-4#)%fo`ksP0=8`U6`H*=Q8WOQxkaltp9~?V@cl zC*^5`gvE9K7?ILpWZRu~UC0*^i8`JEwuP)Hua<+|zsFB|leF<1gm5re@%D*%P?~ZD zzGykL&m0~{l)G>}dEaI0X?t=^C3BTs{3C2>?h&+;^jvYriSlLt?vfaJk47FkN0lTJ z8v+ZByD(pTN(g80$om7PACT&XCh>T_yEXRUeH>6roLKe1pQdnwJB97 zZ#B2hf?Se!J!Pqq16uIwQk$jI-~rOly2u9LiRwTJ%H_EHu1}H0rF>B$ib%ge^!oxQ zEGrSZa<~>PMIAln*K6U?#1Km_Dy{Cu9+aL+YO#eF$w2K+=>Ri(G+9w^pHMFOZ!T|y z2L~CO6%~-g#B^OGCWvU#W98_ci=Ja&Jl3*cuuLu{jBlsUOn4JY&Ql~^A(A8NGt+P= zb@_ue*2g4`R<|^+dWe3{JWRBFE^0h?YMYSkz&#@O$|X)VbHt$_uHRy1!8DRlv`s{a zks(fBj)x`sQodf?UDy2rmc7nJS4b?AAB~|UOcIyGLc0X_OqzG+qFskjSoj(C!|ZQ% z&H3ZW!Yogu6#Q~5718AJ+tObh)&#PBSnikBN|V6ka=wiGZc{7Ll)bR9NZ9c{Q;X%vJk)E?>TZp(tNmHI+vAT_=qHkMK+F4%Yv zg9opJt$w#9E5j)F2X-zESi|YKNd81#e^j6GV9d<*4G`*6*!U>z)JxVjaz`orJv2YrAeZapE4JSiM0DZ#2T32SC z39O3~rNTCPFhgsKUv|RU2?to8l);>r+>y5S2L+g&8q;@akoId>&ptJqAaAKQiGSPj z0GeE_=vf0+uFGc18xv8>7SFGiX0~;mPzrz%g4Q!}U^b272}yU_isQ+LVBsK;Tmt@j z{m#nsZtE9@cRLcMf3^gm(0c@vF@Df3gU}6U#mk9NWzVO9S|#@&-1KBtUkfc zaIedL1Wp-{f_(pZ`C#lW)Dfn4`wqwtbO&L!8TcQZRXq%?=Ph%%0ouVwflX`#An5cz zH^6l<#!{3lk|M%gQo5R`XuUxp2ZZL`Nhd>Wrux)hyiL|VYx%rp?)m0jb$tr(O<4Ee z#W#`-XpY5?v@bUwND}#htU|tL64{P!9M&8JOUfGBk}5*H4FfvFKH9{~Cbc*ly0<_r zvrfwwJ>N?(sZH1LTm*PUIdNYym+gFCre6fJR~KL%(MXW7@j06%218RfdlceNAS1ln ziN?tN4f5va2TlCvCXAxrk=nBgT+n~sz{z;p+1rgq0Pa7y#x(=XuUFm7X$9@a!cRd) z%Q*6?MxG?y!?G38(z5TX66YKwf=Y>B0=-I;i)Y6(n%%Weg%!F?{TWjX(3`m5^d0`x zHoffoFr{oA^=c~S8Z7^x4#b*zkoyf=R;4gRNKaF=O5_6qPG>nXFP{pD-A|IATDeoj z`@r5d3vQ#x&KOq=eN@o zdfQyJOZFsNtdK}D6$ z8g-3hRnQ3Misax!;y<0e!|y@Pyzyx=rCbqYaA55NK#2yVPxe6e@4kl zw51v&I_=e?XR1o^v14gB3U}HcCdjz-&Kw(RpDT9bJDK- zLp?u|?x#)x3GXNEsWG3QrwR6u*ivPtN*iYl%C4rFZP5*!!7;hzm>zE2E+Hv#dUr1>$%yd11BWUdgguKrA>Nk6z6d!nsv4iv znne8&RFT4T8oxWpD!EE^mfE(}AX_V>vP_kYPVcXYA=BGeS$i<_O~+DTHATO-gQ8=9 zN;RDrxnp}%VD|^1TA4V#?6Tfwy}$Ewb>^f}eAHD&3)){GI+K*B6O@B_==_$DTKje$ zm++7S-lIs+L-31a3gIVmPZhC@6HNHuIt>U$gMqnU)(jQ4*{4|=`aLxsYdD`PsX5wK zogQq95z&j(U=c;E77bT8Z z`2`)`6CcE{XIk1p0>>o8E?q^=U-5SteYc}EHs3y*)qUV;;nuilWn9$p={ilcT--cf z`+2mOu<9PGTV7O9ICYePJdK!fbs|L1)cu-;U2%*7i7Z%VtTaKGuT@}=6@!S**d*O! zq_C7}=m&JG%p$32NTP@cL+3J=Typm}4#=EaUz#dg*M=mzd$x|Yv1_y~mz$rrj#59? zk%z3QhcXr)L}SXzY4gmdthuI0?f_1xp$bElVt)v>L^qm=AQb-?V(!_I z+l*$9`7~(=o$Jv;t1vf8)fwXFKzHIb6Lr+s!uNxLZIl&t2 z2L1+YPf-r8_kvH5N|Cd*6Muo0HA?ONP>T{I_3ScM^98yb91=wN z6rApI#A<35?!FpD|9L)u@vm-!+_w-srL;aV?s$YBT*iq~{M)9$ zo)@vDn}VmumO5l}B#N!}^&g2!5(#422U8W4H%2a*1SEbYv8dVo)+&j)82OJj20gNF zv9PjD%vG6;@i*y^I!x_wAwi(L3l)Tqh+FV7*af%zQMchBD1%olzd+dL2TpB1gfuaL zpV39Dcr?c{_FwG=Z~ulrku9}LpKq{k{-$U@=FcXz+SDfBO$U2&SdD{ivZH+74B)~| z-#fyh{-dJ}0A5y$tDo)6prls{!Csr@aTMbz_7un{t~6pM-eQJFA@|R=pqE=oTJZ>^ z)ZTrQ3fX5lylw1l>$%Wj;bYOF5xc0v_gP=)iE)4V({R9BcA#=2?MnK$4dcU~Mz;4b z+}n@b$bU*rwDT2(VC9{B%8kdJW58Kf(HADvyEkpGA9#6IY*S&Y@X!PNa?VWk^=r4zRti& zG8y3}gm^ju-H)}>M`+Zc?Er5J<#T`9d7DGf{vFe{aKIBRB6wVspVk~tnwi>cveD3z z`wwbExhi`*nE!M&{0o2feH*44?r0Qri8WJ)2A&E|LoB~JNSs%z>*I*q8X=|nj!{h} zy;C=BVCjcA{uB)hiKzIX9}(o)iT-kV<#rlO>ep&_UD#4jK#yTg&WxN zRA?o6S#Y4DF_P6D>@-}hK$|_~2tLlu7@a6ZbOhYRM({py{0MjQEw8=X<+zkPc2z0M z7SNgz>__FG;zJM=A!hC}hR0&Oh-AHENg0|jj*MpG>Qv2qI?^k?hf>ed26es)x)EbjU={k1bQR^5A*Ff6eK>+w{W1mksEu5| zm#W400&%3?;N844Gm%oxHZ>uM0ivFL=BEaL9=^C;aWG|V-@km6N(|U~-djN_fAc;x zzdqBuc-CB#pOHU{X*5;>@TDy{bVR*cwYb}MprZaXowfvzP1CQ!QTmvZpAnrr(1)0= zA$PAtiLq%BaKFBqXHY;g^vciUaItD3SVykrfGO#7GfI8R=>n~>tRlaUNo+>>F)FCE z$JYCE-swGQVpwg^IA6{7QD+Ih@^*E2J?BYkmrm8u>E}MG*?@z0KFS*slWqt(09^G> zP@Y7>=o=d~I#7A3OWaP`v@)t;O2<07)g){_%WE`8c7K}=gIvX;I(I6Bh@&Z!Z)PoxsECNRNI6r}WKkDyg(m9%*zRlv{3wmhc zm#rd#89V|(vHSGo9G)1nJ0BbKN>KefhsY!-o?7Sv_1}QvLp{ z0_A(+y}$|3ox`Z&-W^sp+#q1_07whFqHU$PS4BOru z7K9`dr0yq=ynZvPSLeCx0$~aa7h?;bx9G9plf)%lB+|UTb{FN8-7guF>IviJkf$D5 z%o`hj$(GR1>}lgc%TFH}00$w#m;TpFdpL`?3D7NJZR>67H}3zfxyj)}UCPz^f&vh$ z_g@()|KIsG#nbP^ZZRHM0@MbYGj!i|S4LPs|L=hIag|E`@D{Y)rVtf=cL#B8$1-!f z08PXfs0*;SGCl!h;tKCFPVe9_mT$2{xTd<;VN6y^F&o_5JDg_9(aISt=k}a#ve@A< ztnB1Q@R;+n{TiP4x2dcVUnCRtzD%O}ya@w1i?|gB8``u%MG8Z|&$?L?P zBCAee)r^Np9*Cc}9JvH*qpqr9620*`cy^gy>;B^KbE{tS0@Mbt;n?I}KEL@d5D*D= zn{-!za$|OZNYUXly-86Ws)ak88J`t~3`;nP@>k`UN|M<(bKs(yq3WjyNpCxgZ1N&J&gu0s5j(*?b8v<_;G!vbE@oRad_U* zQ|KO-+h2uT23-`acmb(~qV#X#DwgO%wO_qjGoe-E#$1skCF2dMY#YFYM%sn9NX=fQ ze`Ezrg#chgyg>*YZ>o98^GG)O2PBs(`Q9khVd?q{PY|*2dNZ;Rj!&!4ERh&{>MHW| z=U%IIs0g~SufJWF-KF+3pa=Ul+%M0Sa#y3fvb$>1g_32AwIk3F zhd%WNi@72T0FxIN>E<}{_pD}_kXTigBo{7M(u}cgBA;q~_MTW-nms9~w>-+&)eOsZ zjpyra!0X{vxs{FENIJVu5#eo$FAtRV(=MftPFmVhS)(`0Y(Gb+e*LSgzKSws6~wR* zwS|3F4+!Sbelo5iSDF7gwtN&5`j#6W#P`vvfJ$>@uKW82xK0&Ux|=LHEN0k21E+nL zTmb38upH%n?T(U?m^viUhh^^fV$6#>f`3($^^CVfx^+1J+9o^VsED_L0+92Nhd6Gd2^d9oA-pD6>V5LY$prG=y1tjkEFW2_wpn zaRN1@{5~94d^Y0Z&$8C@dkxz9X(rfTYeL9pL*7`7CStuYqM>w!xA8-I!JTcRW<#01 zasWq=Mu>>d%-R{-!aR{5Ln9V9XdKljW(pvaCMGmwzB5q&0KPB^SivVacMZQ?tN*gN zA02-+v|&lr6lQS@I<525YzK%xX_k2^Nm*;lV@zG)+=LRIu5-1|`hfk$K(tucm?om6 zRh>AR zS`cG9?7>9Ud03hNUy^ulnk`>sAQ>svepE^rl|3GKhwf77HD=EPd8iK;nL0zFVIV-I zptGh7xf1^ZAp;1GHO<*S+UAnvty&!-ACwyr$F$&F<_dsdX>gYi1_WBu$OznkS$Z#= z3iuT_kq(4Y4g7mJ74RYs@Yhoil{HHCP~DLaXF!_t%Up%dIGWaKoMt^C6}cLXRw*)4 zUN8XiJJMEtw*b{%-R)BEFH9r@T`A=XItCGys24PGtpzW8Ob-e0t&4^%G?Qc{NqI|2 z`>!5r{CM;Ol7p_DJ3#RTj_rxyEs>O0wit zOG&zH@8X*-*>%VfWExt6UnHsd=a6UTfrC0bcgfJ)_Qra0I1K6(`6hfbW^PyazpV44 zF!gFbqr8C7g4Nv?KI5rkUKV!=EqSh?Ob#7mV^>_9{RE)@2cTvKr%2s8jeM^FIT_xZ zb}@WrshSoYYz_crT%SFubzxuiV{NP&r}wqCsvboz+OS);b_Rk8|E9$AK}6cOzXUxZ zp$f`yHQnb}iQj!3MQqqjME*Lp;oF8U@_~;ic3Z9PAxCI@>H*0qub1Msp^Z}g+h7+g zyn`>h01-;x6jN+KL2|p*uP4VtEojn8dCL{G z=H_nH<|$%HlnR+b_R?J7&i);eg^GhFV*yyZ0%(js&Mg6Adak{J^V>(_LyNj(mni3t zet}v(E?s3Xe=#vxeckE(WKMi3;=3}#qK3car22ipTuiIpd#$1-5J&_cM@by!| zSP_1*LJB8VjhaYa87f!>D_}DvIPNs(!;FsyRiZal^QL6I>Dq95zMMa4#Km;$^|=8$ zGkEJ=@lt8gNRg5hBa^B7#TS{UzlG<641>lWvTRi;%c0)1lz6)|AtfnFa4?o{UqukY zX_|2_G=y&xV_$-bzOdf32lww#LRq}R+wmAU`MSr)Ks^>Fpf#d0#h5C<85C#Y?}JaP zGbS0L?{cX`x6`iP;F9{$23mL6>+*}Wxf(s!r#{K;pm3>&k8Va(7Kfu@IbX)CNQ*2a zZz8^G=!YURUu2?EpGcM@0RBU<1EnqqOt!y+;F4K+W#0YlEmXDt zA>qw{`>GP4I4v5Pe{K~zL}q$gxgnR9NNPY9DJUqGLmA~Bu%~Jt+phO@<98o|KyC=H zK42;;u$VjnyVaso*GPo&?Mdu~o@I}wE|X$D*8i%wT@k8ErBb{mR&K(wV%w64LrU zKa5>5@wP|B-bZP40&rvgPV?KTa7Es-vC3B*2)H4c zgMc|fCol%ia(j9+ybtKVM1QfkUW;dk7>Ucu4DCzl5w6gz8?)EN$4J}Ppohc)X8E1{ zY2L_xfpK>~PXd3SjZbjNjQ-lIV?Y@fPJ;8@&&+*h>GL(niXK2yP}IDxGQs}}ds86Z zy8IH;2~|)B&Yw!eG~M~Vm(EBUrybVS(`nu>-_nWNnlA z;vcT@)Q6|XjFiE>Nf%@xek_Nc-Lu{<$AStWoa_B!dHG*jf=P8pEcg)C4x=?{plE~f zeWpp|5cd=^d%)t$Xh>~Lu}CZ`t{WKKsYdj79=zmaC!MBd56!;iet)NridEUP_f*s~By7=~V#)K(KKITp7aB{c7$rzo$G!fsYM0DAXFV$+Br z*Ey*_8X*(Y=Ps9(mu*s19mpEy!|y{Pk3rBK+Ke%y0m~u>rq6@+ z#yVX!MRiOvm1^GsE^cB2b6ZfwU;a}eo%!yp*+BS=Svm}0aW(+vl$_A8R>*Yo```s6j z_MQUs?9bf9NjZlvY8fD?8>pTCE6knW6}^8EQW(h!jz-}G>vR#fI6i-Z>a`N5g&@D7 zzoQ-xc74g#5%ql=JA0dX-^e>En%@p*5nW-w(17a0jd_({jO{(!3KH z{O_7;j~R5t_=bCYhP>uJMuqFJ>6K3q_=48{F$dhQ?M)g0cdf7(RjbL&9L%@Q*in<) z@V@Z>1*%CWt~zL}f6$br94afH-DmSY)`~Or{qkCYl977T352w1|zU`kakom4PjQtUhI}eW~$kzj>$F~1VKfXgS-^crkJHyMR_}2X#&mZ zX5e2SA(j%0s&-mCwvdEgJ?%Y+T>h$#=B%Ys_yG&U4^3s6RjfjmSkWf0@R%e#|Cg0f zDW*d?)RzTp&xuJ;-TYYDgWvR?ESFy($GmU8!SI3h1A+8XP@GbX>;&3{tWa_v^T@G$ zOkX77@V)7$|MLH1M+998Q*Be?6cS=U4chdQkU@&q6mA1 zHr5sa-NRu^%0%D*kMfATcSlMB2ucT@yuHMi!z~k+n z=Idr!8#DHicEkPju#otQi~fSmvFfMb0bX8_OJOZxV;7Db4&pk6=?T#BQxNpm0r~*H zw7$$-M@#phgwxoi2)+-gWLga$(MoWh-!#&m0`z#iN;la`3*X1Hgl4&+d)~rRBdu#m z0W8$>J`5b3BBaO2hLm?O>YnBSKIpY(x4M8wdQR(O{p`386vjH0W`*C(1IM-uxn86M~ASBBI~v2Y}l5RTm?DZX74%gwvAh zap@K3GBkq1I}WY1u@bi1b?zw3&bQzop;M_(c_V{}mijC#yI6|ek(5lJ!PEneWajsX z7uKq1J=YXhXYItZbU1uSS25C1GdPJMR_WaQd6)cBlIc|d?@hyb{ zOX{B}>5*YsD}&+qxYKv+fq+zkoAve3-jDs_(uOothv2J!nQX&bGWJD2zchPD z+?zymID$LCfiJ2=vvJWS38_EQ09f3rRHxQm#G{AiAd1IUuc};i*h}(5SjibQd#f~; zqFD#UV}e{_;!rV1GTo6exe^H$^u!&1MsQBD3@^3;mYP`XphLcFL$WuQ%hqxZ7%X3` zqx!e;l1t~|&b_(mf}|wnUwk^zH#?!~I*(mGk9f&pLz^hpO^MhcwUF)yHqq#7=oo@f-O$t?&HGaAL)FqjIrI+Y_I5F7)*9j`S$_dZ-!oiL$OOq%fkWh{)Q z$35*$R!ooS-cmkJ)ISfZaD2DbRZXL0(MvAGF3CczEbbb0xQLowtMLV2_8IP&MOIw= zEFV>A8)3Q9%nv29BFk!jGInXaU`e-$U~2%fy%+2(RVmf=Y))Qm)g4V7h+kR(O^go} ztB8*kn+=?Z^H`qd^Nuj6eVZ<@edSYw)CPnFTwtqgTDpEA+z73GpmA9F>~!(SedF6g zwvXXPs*T(!G!V*p_XO0XX7Z)|D${cSA~<48SGhQU4&+)ueDkQHD@Y)e%_p31>BxLf zNubyf(8GT?elg;YT|G967~L&YZ&P||ZCV{d%F&}0AbC0)Z2kE=xYwhL z6FC|iBUa9gxrj>S8~kJ{Z$o?`)~lkc~ z*aM~%@%lO>26o8@UQp~{W!z(!b%0t6ipO`BkWvLly}-rnCSyS52^kGI zc{|cS;~4n>VSC+zXFYfpcLVlT)`^#(>rge+4JpZv9!uQzP7m}TGtq0 zD65a!qrO`B25)fBD_kbK34UMVKp1GVatl3*k?3(CavF^+c_7;a?59gt!m>;qZ<(v&EmoG_oJF<#cwNn zzq`~r(W2@hnBo5Lw_?Jj`V27}(`CQLiQs>1* zGRu~Dc#SSiAyRknN-s>^e<(D>HR^b5oMu`2XWaoIzo-NDx3D^cR7q3xn6Ja6uzqNtewSLmS zqWm~-ZGx^uRHXhxs-CV}d0ZBo;!SP zTs>cCP#ycKt%xZz8UKN@Ga#yKA?3|m1BGHPn~KRWYB|;@OVw5pb-;WD{-XC31D{deS_- zGe{p&x<$=?n159^H^t7%CNim4%JD^G0~tzHB^o}{3zIu7zcxQcjimmVl9H1aj^aew z9?L3AUzXZqV%m4}-NJR*aH|jp6=f4BTrkQ;NPod7Xr9y!%?bGA!v!}Ss zV*fi^;1a_TQ*{hs?VC>{38G*d$iWDP=Of9fj+kjklez%*{GBgPWF zDfOrq^b)*wsG;J9$61`!C`!{+)_NTx&*>)}$zsbD80lY3nY|>K8vt;3@1y718%N>b@m zx>JS@rNN;aNtJFHX%s~X>F$t5x-Pi_;)-QHRM! zuB*#*vU>FMjMAg&JiF%YUS`2MU$;HZ2$Wreh;W8o6Kkd`Hj4prb}F*`5YbqXjAE35 zjBN5?|7g*`fSXDsMcj=Zh0u8gSn0|jEsz!)v>fa-|uN&!#t~r*_XC|Yu+Q-8GfILx3|5j+>NxtyXLim`7~o;gK_I#EUD%A z(tE|lYJ1SFgCEuVfJ=isu`aRB%EYSr0jqa7iB*qN`zu68QNV%F{Pgs?1%XKB)>IBG zR|8hL>5&k|1f%>sqFofyrk51Hr%{+S^)*>OFJ<%zR+ZC;C%@UqCbKF*T#7xU%AtCN zjXBpS7I2uhe9k+*epPqb!vW*_h7VH-3ee9It(OW86$3g!12N4jH*NG_0mhvDQBh+f zY4az@XzzdMVtI0rap&NH{+SeQ4LK6({I`ZeSUI)#3*Il+Gs-gV{ewwMoC{__>^9%QlD!Vy)RPT$&u%cxWxwRW^YC12OPqtLPYr!52wtB^`;Ml@5P4zA44p)LfAr z8ht3CPS$tj7z((t0X>K$76=6_q=RgM)_R#krN)bwW(pKc>z=rs7+0<@SEp`6rgcX5 z7pUG^Clmq8;)0Slb%i-FO`D_f`~rI@3zHQ)f2h3#2WJ=momnw@&vCRiekyfcSCCNP zo3C41b#31Uqyos8w&XAhJ^kQ3yG*_5VdygQyma%;A5rx-mxf8YM<8%B+Tp$cWEo0-V>2=+TVMK;;@clxaER;)>4Xm`FRD6i?y z0ocwiGXMjQQsDAps_N+6S~8KfAXfQ0AsZ$odv51mLUmBaq|VmL-N~i;fcBrnRM;QI zt!8ct-JZmZ{68-jz9n}!ai*PJ+0f>Qtf)hlLA58>kCTgM@XzCdBe@z;Kdu)Ewn;EO zGNCP~+y6YUKd|10{1Y?T0fQj@S`G#ayl_&E=V6#cAbTLcam;q0oc1NfrEcnAH}|(*V^Qb zesgaQpRxhj+ju9)77!?!g7`xAMDZ{RVts91m#_3wZB60VGCnB98>jKQX(o}yqcYo? zF4|i@AW&|N8-c)TVNse==7R(ak9!lDGo5}w!du~-xQ*vwZKA28r2boLskrZDZLHrv z96Ku@0a)D@i7ahGq>l~;FpTMW@;4k+S)!^${1`bZqDpTn^>VsImAvrOBVb?V=yNek z<~h`=$c~m2c`|x86K`K+0=IfFU*?&rYX>c8du5qYX^Tm!ys9fjY|L&ylf~^xh>Xln ztA#AygUiFQ;B*e)g!rWIn2&=7>S{=N9vxZOSVr5N&r=KV4x8`XQ+!>_?u3bklz)mw0T z9ozTYpS*9-s_~W8ryCh64^fB_U!E6Xq)2Em#*PhjGd)q_+9Wxi%Ut6-4$22fC&90a zVfpmV5z$?8ZxlNau@L4ow}O?c%X=L)rQs9N^iw ziOBwl_9@wP=%aYm%Y=KDiPb!6oQoDCbhp7m)P$e?RC4vjD-ZcNDw8})f4$H+L>%cq zKD+s=XPK_!3Ywm#a0R<#der+dWwWN4D*K6Ht20GL+qfJQR88T~;G1tar~$}W*_X{%uh$|l0Bq3?^+nCIEbgu}mvAH{ow9l}9N zIFAf(xKa(pI$=X}od*-JCx!l#0@4CJPnZ~c(R@2usGwF0nNRw(mXItNVAp048i)UI zwEfF=Hpc2a5nq1xv$&IL$H8CDq1EWLy~?M4LTwvH#@onv&R+PBPXq{uGrwkJ_N-bH zrVgI91Mb+(2)aXXFC23gdEU}g%g&{|AhcZHqHpyM{O>iedliKyj)&~{c$Rp2vN?a@ zfy?R2?5V21<!~yv}Z{Rq3sgJ*2W<^g*hz)6bk-2%pMV#xsjf9@#(^N@;rT@P8 zwZ}|pr5eIMNs=wCS=@L(mMJt=gBeS(nHYBjj67HMTXhg(FCBC)_l?GfxeqR(3Y^S@ zy1qojz1Tqsv}p$Fw#t-`IR?J=96}nTK5=XP(Iep7>u`8bfu5^4tKMWP=n^EY`e^IG&UcU$ zxlVqtbKJTOT?RtV*zU;qS$K1&_bny@i%z$Pxhs8*=qQpktLU2~6oidm7AuZ^qS#q@ zk*bcR#^0Xl zcX;r(`S_-GGm`c`Z4#2iZW${$^3&XY`9%^bnm5K5$F07K%{p4=5)hcc+^qH#&uPd`nfXf8GboSk?&BiSr2w8(>%%T&n! zsvcuE&nyb_4ZJAz)Y#GUS@2*KRH5dHg#whSD}0T+?-kOY2Ze9Ido1dP{4qbqZ@xiV ztSZEhN5xDfm_aNty?i|vD?B{a(hTs7%3%I~1%jaxceL~)bk5K>)6Dm_UFw%#z3x%$ zB2-z9KI|%M*wxsGutNy?VI83dp2QsCdNijsvI}4C*v}6z3cB?@)m}D@p0$m93q5|K zd=wRBoR)SH`jqy!^#XzLJBS4R?^aE5MsugS`iYR>uc*fK6Zx4#835fLl!~Ay@F75$ z8af2>{F>2ItFQwkFezqLgsg2Hrp$3g`t+3vA?W69Ikob(9J`!)B&m$EeLtRVGC#Y0 ziY9brQIbv>@w1H|UgSvpQ_0wQsT;&FRk?S#Ic>6(L)IFsgTkd^yHJnzT6_}bk)@M5A{Az1*uQTGu@^tP_e#fa|^eZQAyYcb2)hz-#}UCsn$g|(X>XTFjSJk+1O(V+; zVzzFeD=O$?7^$1c?Zlz~nG-ME5xXaS$lmmU@{r(pgKK zzi9i)WNiKATJ=vQ*@O|38#=6Y_yKfcQZiZB@d7QwR%+KD%z+)UVnR4!sfTa?1a7FvnSr zH3-;m$74ArSUXhu4h}`#y>&N?`fv;P&;J;*gYPx|p8VoYMF(aP92351vFqI&Jq^m|o_Hj<`NRJr zE|Tp2KOV)XxTSfDgA5v0Q53hPE;P6h zxKJlELOk&KoAzrMeeo7+d2^b5SIS{rAa5XJv*4UDH%NKE^;OUlyafCM^%5fK*ct+w z>QHoNup1wu#z-k7&XfrJn7wBl+Dpv);_yF*GiL=p0U;AY4DyZI+eTUB-tRO3RHSII zmIYbK0@HU{lxWZg3IuYH_rf+tohv`1_vi$@FI)U5D_yxNn@ ze=ax@{kgy$VJinjqp;AnSa}3-WUn?VfNbt|8Zikg{1naZ8 z=Z|GKj6k`F4lq|PF@?yG^`|y+fh|AUw>~IjnOYupC1n$Ec8Xxfwlw%L5U@JPS}ID1 zpM8EVL5scl?@C#88%rD`eJ98nO3JT2z4MgB_gO2olr&vp7h8(H=bFlhV+ps3bv^S` zMqnw_I%?yjGx_RJU~432)sLOJ``-Dkz+RqRicRwFiN8c>ZUM8t{!CR6%hrnnu#3;X z>N4~RFy`W%?dVr!uMFi2YNKwYk(Qd+X#<|lZk~<~gND}kUmHvvK_o?ZyXXaJ?>A`m zHI^OZ`^`VcfKjpTKED&6F{@RQ$`B}(lw_P98V8!d{C8ayavK>IpmX{q*|(+Re!GTr zJpsk-2#?(GcY+$C!~8iZ0p=>vxN%RRgAF|^;=3$?S6lgNuWKZzWYj8p@ikJDAL{x$ zxH4sFsrImxN5^LxzzAfWitGF-qdjuuZS^;ds`foWvWs{3M>mIe??zrC$GO#A+_BYY z8H2UYPs5xb9maG{e*lEI5YXgzz`=FJNh+}E&zRr(g{31uT%(P9q1nd1^6)^$fhSi{ zYDF;T)~X`Ikj6=~eHS__qQ;qsF_F|aqODFW{PFfzw7@i{tBC#hts>RMO|NiL-L@>Z zh7!V693`1x=I>*t+JWrEKeKSp z0z)PL9ojsI#m0_;a=UDEm)U6ttXp@rmC+a~RG7K~!kDXR$7Ib_2PN|Da0zI z59&iGh%yB^b zgJCV-3^%c?%Xcy&K7+g>GZiEw_5ZHKDJXleWx*Xv;xl&f<^@2e>uxLdjq1og=o| z4qY+ViLxnQ+X?3TArGr$?*dg#UO!$hp58mp4Q}Q9f%@@hkzj)a+e5DMf11s@6Ql_R zAvr1oF_BTSlEzNIZ*?Ur7U5*j?V=G;?@AfxZ$25#AXNHmhjA4mUFw|__!6llK?BkO z!k}YavbX3A@A38{u`Ykvhbr&iVTW z7$3`{Mr#YEQr6WOq?neFPjx8giv>9@ug!xtW{Jrkq>}K5M{Y-npZPGAgTQiojiX_| zvN{O@l~sVH*^Y~0v%ofHAnFOuS{ntu61MA<0E^-{tn?$h6A)u61ML29SB{ zg{)Lsgt6BXA)kU&K@oE1Hy#Cux~MYMFCe^TT;I z8bY+@FApN$d;W^yId?_}h%|m+&pq<9i)?a6SgP{ULfC7?u)LruMa;%60+W;@!Uuz0 zcStR;t@JkTuD{PxYB`1mYuUb-l9mP=Rk3P`Hi;Yyj1w8Z8HpT|qv3g*VM-ks@Twc@ z`bRg>idoHpv#M%s^E&{jK$wmr#n+V{=tL21h0dGdo^{Dsj-rKXGUh}ZFj_5B8$$f~ ziVoAAjWx_9(Vz5XDt|fOn4Qp;j9-)OwLhq|l*2wUw`22_>2__;#XjpXUUg@Y5RVYI zr`0%z@Xy&fQ>+Q25O?|>5m4>BqEg=&l6~AhM&68KRM~5rG*u!&z!jsIvHWP+G!8#I z#2`$zSeAL2e^$>Zdf!u_qW^)GEn3R_?tML1cfJ=Nji5IrIUQ|JQ`8yfkG)zSw;YUG z>L#(Jbg0l~ERJKGQb<3lsM>`lC9O~dV{l!d-Nn(Kacz{uVFz58N~zX|WrwFkZ|a-M z%>`DfOl>b@DV$<|jJUQ*FSPsxN9;9^i$;1SUs#l`r1?tADQ4Ko`&UwxcB-LOaw-gr z(I*};q0C7zUONF}H<75E(Gd3td4SxBK?6>Ti-VQim^X$c^l`r2vaFy6MP=8$m?IJt z|4nsrT{nR)z(2K@b}7*}h(#-oI15PKl3&%Y7dJt0(qPJU**oCIA5B$4Z55%YESLR6 zG=O3I@SCOtox>PM#r1hRr&vAn;-7@ljsL*~$8hFUvSNQ4VD!Tf50CiN)X;n;u%jKK zb0NdfB<$~j=td>v?hDw=S#NCmCG+uz&(vN`@3#6q?{#)rMteRJXZT6^xz3d{;n+wq zctsg^p)ub5rh~C6j_K`56@Ix z8_ci=o}7T!wVO)%{9+{!Lc)Djjvu(LIh9VuvZXypJxl%)LJ5%7z$Ga&Njho#=U{zl zenkeV9<0z$*@NjH;$)z7#$d7aNw(=6d1;#GI`R|*|7Uj&&u-}4AGRUS#*(r$h!INj6Y64 zR5|`SHn-@V{4vq3ltO7@Uj`|Um^TI}C)hEs$)4NXDFdQ?vBxigfN`nKd413l%xjCN zEREybpj@wmdVdlcvvv{}Rqz6<0PTn2XS@4YI0D`(`Gk+3etcvc^AqP+_cCt>N(W-R zMYQ}?l=Jndw~3QixmepTxV=}i8_yq~znIk>hUeh|mFxfO_(D~*hlLg8>lMfP_*i`t zYMuG-@HEV7j+jp|?)UYQZfJXv&CA}Qh9f`gPIJ0)(!lEP6pec%y$HR(=uq3nSnD9? zW{G0K8)jI+)mrd242x68q^PS~R4Aiq)~Mo2gmCtM>xT{s^?FlXo1%fk(t% z7~7)gnSVKyPP>B zv3gO|R})TKEW>y*Mr>U|%Q`s9I5iuZ62An%>Do|Q5b&UY4?OlN2x%Id0mCz$0Z1Np zHg9X?BlO$B{%0JR4KK$9!V-w@Pv%*8Pc~)X7@_2>*Cg7h5(O0HQ5%n6nkLiOg>>U?xTqze%3CgZ4QSa+QI;&>*^AE2lip}o68 zH7kmZk)lGU(6h=KB%C8z6GhY!Dx+^=d2s96PaeOa-1g5b1-lzRC2~FJD zzIMj%U|HK&+&pRy#(19HZV7K8$_%zdfP6}cqnY^IYlZ!Gw?C({;OVK$lD;#J%hc{0 zG|R2mXm8Dpwn^BfXDoQmr|P-jaRY=f#c*?XjS}uDA$mV7Sjxbahl1Z|nfbo@aBu)~ zQ0&LBWBg*fU9~jV2@k*5N$d|wyBg8n)L};=rz4$+);?5b?rrfO>t5?;&UAdD@1T0u z(`cICdnPc1@<8k_kR)k2Iuub+K7&4iw^*Na#=ud?sft`-f>3elp;F!bUS1b`qifpPWAst#LzpeWy)Iwl{~0H zwx74cyt~#uHwGF_Z0MR7G1SU53Gm$gVC*$ zLqF57S`n(rl05;MPM7I8pk%qRvycW$0#;M6^etj?!WE8a+LkbLGh?iruAEU8kmI+7 zrAJ}o!iW7f&SS)Rmhc9mP9IR&ea2x%;v*Q*XXy`uZ!2o|fPMdVy(g7C~9alBQon{*!Jf zMwZ5`LXhk6t%>lr0>z&UoKgjhB}pf*V8jyY^fpm>10SEpVu`X>t+wN?p=Jdv7TUP2 z+4!~DAmeX|0fCF4@R`dkKd+@HN2+{0$(3>@=KBN>01D7$VctKwNRs>$x5L7{{-kHw zh*?W@Wgpo?E6qu0cATt|Qn&>HyTff82(&mzUe@D+qge<4-=rEHs<$j*eSYhgA*XNT zxK@$~vCOZCFfSseZY&XI`6^X`$+xb+lx$rTK10N~4WGJ1^j+deUz7!7lf5x2W^gWl z`H>C{_Dns>Bh5-F(7my1wmhu+ zQ}Ds`DL{qqPm+$pWe#@g!K~C57+xGaz!7pz=(Wf${BQ6>Ek{OO%X*s$TK6Y*XRi$% z3Nd2_2eSGaOfmPf1VcXv5Bvht`dpc8)V8wMhETt*!R$8N=X-52CG`$JwzfqiBGlYH z?F&Z}CN_{{`(K2(08VU0x>&B9u$=t-vz(Eo!YuUbMfM@Q=1wHE7VqZs!;(ssUVTQ$ z34@(eC8iixRPTc9XL`%PlNhO-X=W7+zoe)Iud;K6G$%}1#Ir8^F&66(#T#NJ7TxHX z>D(^RT%(+pqY5FNmHNTtzET`_QS~4bCl*B80O8NFbD3JLW06{R@OtSuyrsJup-cvv zHr2p)%*=8cBw^<$<>L`kI0BdcJHj*(4o;80S++bZ{t^(DE_5d_(X^yiyESuUZZ$XF zlY@MxuB$A9ZCGr_QHNM3~qq8?3YU%gNoP2aT!0t)LP5~;?n}@6)RFS zuOBilWMqRYACj;jqz|E;(YE|ZV@OPXbEM0Ekjr{qyZOP`o0wQOF36B?w5KT4;m{T6 z?1o5lz4W;c3LRob-I~AL7{@kfXmt*bYAUv0!cZP9@CIphXn?R(+YhtJs`^wd|G3M8aPnkDrKh(C;CL zziR5?(2c0Qt@iTKk2yJgqVVY;i)Vz64dLg1ff`Y={#BgVC9hx@R0XIM0Z)w+8U7j7 zgFXjb+k@=A0GryhwZ!XnAdtDjv5RU~DIfU?y~KwsH#t~hC+JFqhn?!_AxtE z3N3ZpALRtR|9`e($c{lZPdq9H9k~B)8A0UXQz8jMnOGE<{hq$-yX;}0p$n`~!Ff9V zt^@X3z=F>R-tb);=~SEuWEq5oWMKF4L+-MnPavuerd(pkiPI{7H%MzYX3v3ji?J}x zztI!bBm;HszDX)^_@+>YI4feQ&}VW81UmR!lIv<#H=NqqQG?O*%~0`6CoEgUKgoWu4z0r7n z?Ov_#&1w1ajzQKY)Eq;pa0gm39}u|2Pki?!cEQ5D^5bgT!0>aVZKl3o4`y-zs{w2}7h<|Bx6<5`b0sPF? zlO?;H;$HhthkmS1pwq|uH039+yZ(IoJo5oz-MEr>AytmJ|N7VnGzjAV=OlB zC(cQ+UbAxTM3Kv;glhdd$1Bg1QBk=?puTO@gG3+z_VF_yPZwEE>O0pFWMc@Kkyf3z4Z<^8f5e} z;+Bby5i;Y#?&zpDv6wJk875dfd%RbF_F~n?;9p0~P|&H;rei^0qlhzz9N5PyAbl~< zc*-*dTYqP#AO5VbF|x?XyPxK_hfN^L1mee2dD0R#4oJ4%u_B=J$9j9@Hk&lGUh_%B z{{`CS)|50zg`?sh@R;CABOUUs`n;+b!|r)b7_Mz{pBW|*^jsk@)o!2rTrNu(0cY7n zA)6T``vGSTDlD+JGaTjaW6ZcaBQ>E}Uap$-!6+b*B-C_uGaylGF{cYjUNNEa6>P z2tpaP)Nm6z*PB6OV7cJtF;m`$RqZq}{XzmQFTuT$%v6m4*9Qqhxy)#(o0TeHkfZr) zoa-~>#{da_w92tZHHI}%AMAeJhYi~o1*rqrB{Jx@!@PLtw>^s(x=HQYn;1EE8o%z) zja%MxH*q*GzsRp?M#*1?f7y_0$m+${=7{~07aaM*M|iAbmGOevO^}(s;_-pW^I@Bg zdWTU}><=zCSHPl(F~8CrR7=vw+9WozJzog@dgN~U^U!ufl$GLhE;xb#I;`ifL%pOZ z7w2-~xXSAXIiZq3WkDd?uSW02^yf@h{au#xFyalLvarqupPPzr8-*TmA4HHP&y`q; z970Z|#gVIQq)h@J4EmhcEsp$#%}4`Mlygm4pz$}WlR}M{r5~?OEN5bOG?Co(o-kgf z-24xcewoh`4uMrzt7!F4NR$s|1@E(9^U$@Qb)=NG>6}su1@6Kx{W|T_<4;K#8#JPs zMm`4T@G&K951cM^kSDZ1W!&IoMXwDBXJPK_WTHt05mmS(mmM7u-~jdic0#Aju&o5f8CrPts5iUCO~r${b84* z>9gH;GJY*wtfTJK9mSvP{(~TD%E_XGDlaHeyOOG@Vvy`zX|+{7Gs5t*eVz49;}z;v zB&Ywi@P5b7RRGtLH!ZwCrwu|cb{i$~BYgUxhFt_DTcXWkP1Yh>Uha6OYhT0r*!U!P z=@UX9-)Z4cEpYrCxZd~RiYin{tZulaCp}Cg8s;i$riG?5-y&&l+2BsNu-tw9YsK!+ zK2G6Pjvy4D*}wHMqM6qUtMtFR)I4T?6T=a*?KMsiX`-^4n6EJxmhN8H?vJRfF2buH zy$tc=(L8CGa5jCmt{y>Aqs21EFS1Q`r**&caeKSfL>w(RW~AT+>3k55c?W5j=%GU# zWu-uV%nS40j;Hu1hzsh%r=dO-1e|_GFM^F?WaR(ybEeT5IGTH1#@Ro&Q^%z=p=HwW>Wq+@pNBW>D4&Q+x|p z6op5%BLpyAG!(@-$x}&>H^CP2P30M*BX56vdSO45!lw@vsDdmEiDIQiD;WAY*9LJU zeNtT95TbI5V%3~|Oi%xCmw)5d`fz%Av(6r=9>jjvXk>2bQIep0sg2`aIpI2zR)oY& z!$!Rp6+;_$+2nkh8&Cdcdu!c2>F$L_xhGX!J!b)qmt6(Y*HGB&8g(t{vx;4eKD?{~ z?FWaFwI4xxRd*bz*-fT1@q6?&HMf3^^N0@|d=MWLI|5UJdfQ%p4@_IS;{^XCBSZ1# zdMj4kXp#8{ovD1rCQctny1@Bd?-p+_Z($G%evVMcF@CCH=wLtK+GZ*TA4(i{lp6SO zD!Pf;?A4}$At5j0n^0zn@~yi!u;i;CeEXy@Yfz*Q2!v*3U;7T#VnJ2wpqJm!uJq~I3Uq&VOKueP1FJ6)vek@VqQZ!_j6>o$X1v&Q$uvsSH zSuT;Z$;z8N0y63V52z-LH;QFI&OTdJgeO*U-cM0fSwvHtZtAVs0T!u)|cdlPv#IskX<-$x#WB>4{-w#4&iqJ3| zhX9?3<5b0aJ%{wekbNItNsDF4(>B$;H+80$pIE3E-^6Sjk_Ify9x*(q6V0O#992rW z-tZ3MIvZCF<>@j0xthKIV3`U91iHm#Cn3kBsKOF)J3nBNLTGG6C)k#Lm|lYQ`sTk1 zhCv7p1zp;LB_O4!0?(kHSR16E>^g%o&NOwGR@?s3m!pUFlj+3O=c@#joRs$>rAE zT7LyY%VLmVGZCW&mm0c^zABx&Cfw2C|FCS&K2=Ac_=`v1#e?g!&omd56m4wzqu#(M zeoAU*{I`S?mEui9HD_>A587hIOX9U|3|Hf3r3VK(GHo()VY^mB6w3@^^$0dLS-mnq zU@QY&e9a!h%J8>YtFVg4(lNF|$*ydQ6+>~GG&^-@(@3d7P160_YcBGOY^EH$XG7Jn3)Om=*`!?{3Gl+7<vJk>SglYs}p~ac#{%gp&nQ2AJ;uWWF$Heyl1KiraFZZ@S4OPSt zOJF)BQDSi=D}RnNGqD9$@4*R&M3zWBjMyP-*{HhI!{SU;Gk&Eb`JbXcDVjaR%MVzN zJq(da>i#H1Kr(e)-0+ZN^1rKaHRr>GL%2>kF^PS@bZu64ioZo$NwlINA5%Qjsz>LF z2G%iB1h3{gjDQ0|Jh$)>4r^&XTZwRA{9Qa)&fH)@8)sDD^39=v#&f0WXAe2<|5pV- z@$QO0$UJLsrKXzc@zZtX&%Wr&u0KhlA86CrIEiR~@$P7t{3P;j1yKV_DJs6R^mE%< zfdl0ya6R;Gi-LpbG+Cd;FT}Mj$al+6dhTHmo6rs7&>fAmcJD>;O&kum&4X)A>@_$i zNeG!>OE8Q+u0v%m-(u$-j|I%ufwMTwd|E|b?li5n34Rw`DTMpZP2tPRKvp*}`{vO< z&wJa+Y-(!eK^ZL-IakEZKFq*;ruP07P6*2d(gtQI6j~;y^7g%Jz^klq$MP?bS6#I5Yj|ugEf`7nAxZid z2!#f{(Bb_8EqWY4cBpXOWHVdB+=`06I79`x!6b>H#T z63-i;&Ku?ML!>P{ z(VhQ60EDZtQ#g`J3T;;rtEc)DE61S20$1nRR9zB3sXWn|{#-05yF z-&M-n41#-T@CL$5w}E&KR13HNlT$QsDON!eFOR`mVaG=m&*d;$%hV`-bmH|1qA&u0 zfx(lYK~|n7qdvL|Gs6cNhvK*tBszYY?WKrhG|3$81jq)}LG}D*myqz! zaqLx2M`RfUG1_&e;z+$m{)uVen76XN&F8nL|gcpyVeO*vC+5?Y|L~mFSSHCeN%o{tuNp*XNt%?s?fXSE?x2oW}wn{e*tO2B% zEKccYyH@{j#uA9Cxj{pDq-5>d2^)zk22w2B_nbwvnNIp(Bddh04z~v=Z}>i^?hVm6 z;$=xaK|iba$3HdYHva-iefl3Q;QSUoQW2|?3f(VU1z-)`zV<76{?{3)&~(LhNT7#avQ2dsQzw>tE}DWT6NZ6A{+?%S(JlSA#}kc- z^@}Kpmhi4Ki<>3+ieB5J2vduf4}H-8RAQInRAS611FX!)rYl;dwp-?XyD#_tmwt)H zk4No;AmXk)onn>h>?-05SkglD$CVF%fgT^xi2RSsk86zTI@!Dqiq*A#Vt&44LE5?| z#Ko$JL)@*;OtR)m^m`l4#ti$$0}->O^fw=h!1Ry8 zpP`R1;;>eUq~*G4UYDb|cD+tKXo7^?R*HgJgn*Qd;{wg)ia)TZJpu=acJn$nqYJ2F ze}5@mH(|+kHdbAD9=A)t`jO^BCIo>WGI1b(58_;RmoNq@-_ZV^*7E0+W<0^xYu1*! zwuU*&eW^?o!uL20lQgUsJmZ~eU!i_dQcQZj&DYm}N+aNKD}Nm0XmAX7yxJ{Wn`%@- zU2_lYGSm7d3U<8!V)MQK?Jd390smF~g|>sF8=;7!wDRw+M@%ZEO16Pr_~3mfwOkTC zzxDoLU_Bbk{Qct_gg>h*pU~BIquq1GzJs20;%|-&2mToh1b!`YN48syhyV+_%OFCx z>x@%|#$189l$BO|VM1hUJm0A>D1WqkP@OikbWXZIc@K0LA3+~funAp_M(;2PitsnY zD_Ma^E1z4zDCW*zXa7&>QpQ0SZ3~U4`fs^IVvRSQw&i(g{Gdg}K>sScBPG+j*x2{q z*P$8|Q7>*N2pa)l2i??gNEs^QkD=Eb%85HAOTxDMAJRi{6_m*muWC%c?ubkUKdTtC z{lN58^l-A18lfwG3n%h;-?w{R{^`n@b1;1d4Q|0=?RmZE(D0QeNe7S}Vf9|e({~FS z#HdA=-DX;~b@K!#2L{56o6?yp+B~Zk!@Jf;>lL+D{8sUc7%bzb-wjO=9pyXBQ+#{ko;6IVVZsYqsc$-)H4G8k9k_*o z7(+%*WMmFHQfqaxm+KW@C#n6wt2Q53ptoA)(arQ*=|1tfauCuY01_=-Co(0z-e;U% z=t{*C%=YR&^Fda;M_m%r$z-?M8v~mBWS`%;MV*Paf~{`$#U$JU^4FjHaV8=REQp<( z<8IWT&wH6m-4qA{cnGSd{Bu;6*9e8{?t27~_u3j+LbQSv8E5b3WH)|K2x(*NLnx9f z5ja;W*~xLA<1?niYU*;jcocb4@(=fUhNBl5^^#XKI@0p|HikvQ9G~%OPgP-RZ|=Q< zz98A&ZHNKRSpJ7fN4EVIo~!RSsH|qxcia&xRt z6?4NI$6js_K9J`W7dvgn8kBI)Sh%>Qaj_eHipb1zxGpX~tzxOf#~77gRu{UnPac8P z`*>5I6Zt#7NFNqZ@m;OhiN|f7%2wgyyaSSVIOncR+#fxopYQ*zn2f;_W6y7>2o^Q? zf{Stgh(^F42q@r#)-Dyrby{$ZB)*9)_O)Tv6LznqLJB)L{*wks-dExnuPZ zyw#lv9T=&m$d!xJNVUkma3TuCXQbNDr$A}lUC2=6QkERV>~G2>pA3+=#jtctge1H*u6z-0xSx8bqN1LXpyEz zSa=~>-xfr0&sXWoYk&ylm2?=CF)4|FV#9m+_X-*?bpKm78`^?FwBr2L1qnN!#Cuzx5jYZPbIuRtTSJv!suu*?KAcqyr~4!g zBb*5mKVTUAXpZ_I<6oOuP2;Sv%``x3inSF=VfQ&DP^whf@(UK41#wZxE|XsR?9kb} z7m%AG314@W7?%{kfL?XN_a$ZLlAbpnRjLw~W}M06>U<$wqKek)RONQjtg7ld9~?r% zM{((Edf=tcf~}{{$1^b>4{ePb1Fw}M8J^hvBPVZ}-7^r6e#)<1qOR^(pc+Gft+~U` zCemiZLmAS+~fHYjQMnQ@I9~@o))&k7$ z%WLq}ufEqoo1M+d&Z6?Sqo!a?bTsAy85sdCzpf+a}cqz$zN$Iv}f#Y&e!~O3-BG z%0j--rnAn0EAT{eht&Oqi%zhsW(sfI&Cg5(T$wy_@0XZtbRqXXTft`K^)^mIn}Nh2 zPyRqjz2}?P3iaA|N&Dcd0bx`61p408vgTQA;?$nNHoDKtFCYxWej^nn3F+wVr^R0k zF1$q&N;MPjP1ZCFixK7&5f6$UFs3zguwsoOAdeU;^myb{<8^q&=@ZfDDo@66Tl-Xd zDm$5go{A{Xx?GY@m`CXA3ZUhne}t47et1^FUfNL2@>Ww$NejcNAEN*0P&#x(;W>=) zu&ND)zzim*cm0XsuciZ27~;+fwLy}7n{1C5aJ0Da0Tu#NOD9TscOGa% zF8iNo9qrV7z@6#Yq4Y+u-Ag+gxg1qq6Nlb4j&sK1-4%f~dNN>8yW}-Zc)3cFoVa@k1k)v5TP~DOzq)C=`Zz?e__6^?mwyc5} z7g$+ zFDSCOtL!M5MufDyY>ZXiNtumrR`mg)Bwk_YFR!z=iD90Nbsh*l0wI0D^HCNSWfp5x z-WjffDl`5M;jP_|KMrn}>~oso#-Ghz^ehlIn}I+$R4-Xx^Zgp&cBNklh9S#7aDZQa z0(4HvYU49+Ht>&S5KC&PMj0T~aedWfez)zG!%Zr7%JIrHe(P^jz8gb=+8|M{(dMn4 zp%*FYJJ-|+oI2zjN>4GjfUTKWYvl&c@>s z$UO@m6yJNl9Q!CYueu2~webcF23jBSlEr8A*D$vYeMj&bvJ6z^&kHe2N9p ztYuLC`oE1t1?qVf`rZnQN29hMx^7c`_}v0*wQ#wNJBS`euJRfAk0<9egQ}?{|Y7AEKu_clkhrbKn z2zWO_k@Ag3>*0x=H~k3+n}x_XE&C4s*PwXmzf(nrIUY3p%8uak&i-OggZDV+0SGK} zC^UUHuVwG^slv>phyh;+6LBSOsiIc;|5e>rhE>&d?Jh#PMY=nskp}6O6ltVWL2`@4 zBeh9s0RfQ|q&F%^Bhu1Rf+#5|p@0Z_7RnCa_xaARbDis){X-W!=9)2Lt}(~BZ<={1 zw5c_(IS)N*<#t_=PS~f4uaS;F)W)aeXy(;h0Q8tQAJ?3#UZoYA;wbApq28_dT*Q{L z`!g)+e`tQa+Oy71?x6iMsXy4e!%#fNf9*7(>-F)%j+AT`EqMzWz7Fct#g_@UF7=6+ zQq!pSUsaYOD4(o+eRA1X`FTWTjv;>q@!P~mkUZ{u@<1j>7dBIoUd_>3?nm>GE3+Y9 zt=yzBQUX|5$NJK*Zr?En_X+@>TV$;Iol&-xmou(wnj-0%LqA1Y6ost!U{;O9I}PtH zen8^P0DXOL?{3O>tQy^rq2cGrfHh!#ib)Xbq_fw&-j{Ln7U@F%GDXCa10x1QjFY%6R5OY=GNgH}#$DLu9MiswoOG$F$tELfG(eNEn}k-(m3bifvSb%xyXP8&7Fva>2^U@S+hys!hok8!sD^$Z&RhTIna} zkFE1$B#`Ctp*sz5IbcT2vOp+MDIsj?V65=Y@J3%UVfxK{n_7yASw*@Z(=57+EYe?b zPCL2*fcT2_^;$DB?eWn9$b`*okl>o32W?b>ivhG!V1z@KDRgIawAi|lU|#^(_EtCT zDpmF5l)?~=$aYP2Lif%CId7_uS*E#;C+CDVYKmgzL*zyuXX8`;G27rM6*j3GELa4=4&`nKvpwx`VkHLOQ^508*TFM z_t1PpyN9?lw$6?B7g(J|n+B7MSr%+Ly(4vrNK2Tay(M#WNSR_W6qe{Y|_aDUF;o+g4puXC{SCd{y zV_=eLVY*_*@_?Y>+8=}m?3Hsxe*!lGZvgr@OlcK*0tc#&xNKc7vM*;3cAa3-^{C}M zj%AvRy?8n}Yhn;aqLFUp%r3!{E0?nE8~bH`0lRe6rd5_vM;7Z3df}xXNbAy*DLAtt zq-kW1BjT&-qJR18YOdF8ih$>aDjbFYV;?SQdT@!i4JAxB1vi`#e>R-dR%X~In8bQA z&rvY!ey^dbm`?o&dDBl}9m)6@M>$gED7_5ha_9iGZ;_>bKQ77`F>O49WKw?bq($!a zVLGE9t-y7Jc&tNi5DFc%qfvH%gxvR*Y3_(jI7gWQnjwXJi$(aHgW5Vqi=~jix}K$mDoSA8^$rjr01l1KeU?&B0f5G81(YYY&VBz876vu zx%%?*Eqcv6%!Ij;5({yaB70mB834IFLK;Ni0-~}5T#D@F6!gR9j!!nze2Bp)c(35* z7*Te0+Lajfrni){{)PK-flZ?nUS;~(xP$)?etP+7Ak6zgj$WmLKmtPT4_*X1`<&*9 z&}@m3>~FP~;`+=w=*+sZ3cos~$Do;}m`D4tZ~VxXF}a$;2P9Jbp;UL*Zzu&Q%VJ-_ zlbGeWBW|>Olu9h)uTEd$7VM1y6!}vM(3&w8a!$q-AzyNfm5@l| zUzY25&(fvR%a1=WxVRh8e7}L<#gBKSFtJ|Ge-l6oUi6j3xk6n+;<~N9v{k0GoD;{U zLdE63$TPq^1Gxv;lN9&Hi*ursb{ils<9U7glbE?JS0Rs6`#-P?qIj0S6wMgq3VbGY zd5pWYuWfU*~oPF zSwzzgDMI!rRuPJ-pXuaNb<_WmUMGq%!Sacj=aO7n$1=YBAoh;1*pgChnj$}&oBpgx zX4;!v+fZT$J2Ur@AI}H}NeGhwfWWleo2}8hOwq9z;zpCun4DjMxqyF%a|rlipBihaF>KQsZP_oea-5fTcVOlebn-`P9JxV2jeh4Fir zgm#_^l4an)L@#8z<;Y-hu01F+|CU*}ZLFOv4gC?x+5Px)nz%YeFrOyu4<6`PO zzJ80fk3#a=n8oMhJ&FV{mQK#czCC0zL!}k`=(QNe^TJ*8q{MaST z+NViMx@`KAuI3lxlVp=hsiw+AbMHXoV51JXR!sDH0Py6Us^w^(S}Caz#FBu-}MzwOkIK%zYu5aQdM$(NXVnWgLwH0c)#4{dnt&Io_w? zc|AzkLd;XHVjHG1NU(l%kgm)O;mT~6+AQxE#ke~4HV6shjk9&Q3Jo?oQN~z1Kism* z7=@sV%9AxYEHQWg`Dn>>6FYGKx<^2;!5h2ZqoMFeTX^Mt-P+&#^=JBDz{(AL*WWf5 zH5x?1K*JuCrWN}OC>I-OLQe@j{{2$|rM;2`ri<5cq`YeCtQVN?UI5x(6{L=eCucS* zQRpSFqqhx9F6Gr6sF~F? zu=eGV+2=>I+?SuZ_2Sa&CoOq6)q&6O$L=T1VI6gQXZ{@Xt*)de{DvK*Xlm)CI3XO9 zUzVJW8DmV6@wBHP+F>@j@bj}v_+pIJ(18n!XcUeJsYQsD%^B)9+)-Z*~yS+$1IDNlA{y%75Bx17BuCB=L#7rA^*->ecisQTJ3lX)v z6)h57f9rhRGLpAva8G`K&J-bA$4=^yirO)R~fERc}0)6m#8N%dJ(Z} zbnz{+Q&R&QZK!sHFm)OUKXmm)&@Bpo?!3|l_l|rmq3Sz5&i?QFyzn}`2-$gfdtt2g zJ^MY|^gof^_EXkQ)QG0X8xxkLMa`^VuJw!IFqP)!$Z3K~7C%KD~QW&LE26x1n2xa3<9}IU??| z@(TTk_U*R@QJv|Tf`A-?wh?NG$+tmg-+JnOV$V_iTXp_|H zR%}=}v!uXxq+6~rs3Goq42_6wHyh@TAv4tDD9a*8>yjI(IiaNQ?$oSlMK|`H>K)k& zt^fG%w$MCEzKPmlk@wAVErkN`Vv?`oX*6Q`b@@ETRV`RXlLGc z`p`;|W8fFy@@mS2DDpUzr5z3efy*j#TD)=7LW*{sD4`ZsRveYW+*fRpN=iwRTI)nI zma&ow7VfkaH11MBiAgXve%n5wv(AU!kU?g~lWSd=X;D$W4wH=6XV4S8)&GttN?5Eh zFmlnAr2*3^FU@Jr&l6^f(0{+h%nQT?^J3^9gAe04rnLw%nioHNC zJgzpTMQjjErN}WWlS7@)A>@AaAa74IV`|SmR7pooUOPc3uU*^w=4!p?I;q#}N2%9T z2J$tsqqED7g=(xl)DBbcvovsv49Sz=1^hKqdjzcgb}6^A`*sL1Us|m`>4=M0x*2mEaMkvkz6*e( z#}bjgrTktxHPXvO#{^&?XeW?qYye^C$JRniq0Q)@%Oh2vs_7`d#um#a$Pt@n;0|xY$d9y6r8vX|fZ`l)B9C9HDZt@@FNFXX7ej%$NXWzRBcyk17q*@KwHVAwD2wb=_&p%-AK!0`VPiUFUzoaW5MenRv9nzwZ5)QlemNaL;kL&IEWFliG^ zp0kV=D4N&bM-17SWpAloj=t}!OPc%%OxVPi zW3K%LaEB~O?IP5%;p!JT+VGbX1bK|LU;2o%t!yW8s**F;P2v$)iqt`6s5vNbs! zJ0E>jw_C<;I=V$Re}f}vG(MuzgOvo!N-uCI@)Q5Ji0|!5SaVT^T>6CrF|0dJQ!Pio z#K9t4y4pOgM=xg_DE|WPdu4;HUIM;yUGm06Q{O=W!H5OAedafmKNt*YLLt3h-_;oM*p555(d9R~g<8AsO zmq6u-!q>-lm8U}|y(dCih@(HbD7ZD&HRrn-+&qr+b%%RCH+x{4qz-2REq`7m6IF3GJYS095VPtM*oahWPWfipf+%` zra%oj7ZLh2d7ZaTDMVeppHgTUjtR1EMkN;UQhuTDR9j%n`~@(N;NHHVBV#X3cTMmk z(PQ|uFwa(+ z{KQmsL9h6}Sl$5-+dfkrp+x3tF3(Z;S2l!v^C4z;6E9J|#y3%ruIVbl_4Mc|`-^_A zu4YZ)Q_Y2gTrWIP1O}Y^opbe|J|6~Xp5INcuvNAeJCg2_l(%V`ar=d7=9;0f6iUXx?o`HA$+xfiu@shZ5?CuB8K}0S|akp&=g3fzDenE5MH%%L^ z4eW7gd;K6|g2Jw6_u z#pA}DA(7V6C8z>jW5>OkX=NW0&BISjvKLgebOtxu;sfk}V(V3pv)$ZE@k-h9JYpB=_SNWL5>=&B}ti@iGy zmt0kg?ed}~3=AtCL=(do5WW0UPzmi1l>}Cj48mx?&#-3S{$Ieeyt`#)#VN*jmb=Gb zSazRC)@n=4S{6)6sna14a;x2PAwNO>(c+tp#NK?NkHLM2RYyly8P80M&P81f+hPJC zjHLP(&`Ql(75eEF(?cA2l+IyT&cO#uS60)gD509gV_=t%R-tObsnE-2dN{WrRNaO$ zcGUH}V9@@Sr}_zL zfpzW#K(?nrZTtrGeg=kpgKMv7_rMg`MYseei2^$1tcMS>vsf6 z6~f2L>!YtRE=CT^|733E+@`WSC#_0>TRXT4b^;8?UHQyQ;!kwna?}WJDs*%%J&@8Z z3W$Cq^CQ{OKZI|K)K}Wa^uw!V5E6D+Y?p64ZS(TJD^;jCJ3_>Honf%yvOMDHgphAE zTXe&Zvnt~6rJz+0!i@?x+?HT9DqwXnZ3X`mA7@KJR`QP@^C8)oP6^GoQZ+A9C+EzJ z!!Xly-(o_cSBXvQssn9U1mE%fRD?VVOK)AK?kwWw9T*t(CnXsKXtuWmoU_I=LawR% zJ*?Al)s`hmDSOP8W^1Hvvpv{1UJw~%2s!^Xa;x9B zv7a?>I4Ws-!z`PX_4W=EY*r&AwN;NQRI>Kf$;!*|uaPF!*%QaPiF7czHPwzu@g%4; zZWz9iB0+p>uk+criyx2ME+e{%asOwMPj@ZnkB2Il=Zy<&6KIG|c?hfdF6m2QL;SV! z*ufKdrJd(vWe(4ue6J;$%G{}T(KXi|7j3&9TWG+#I zLxf{G-0SjX3VK~Kg2`-Edov~;Phy9eS4|W5euOp*(?5Im)@i67KL_BP#cIrjo$lPb zhOfz+fbG+H@W`=T=PKueblUX@@!I!zZfx~AtrUAhul8DJ6uSYuH&K+3QrQ~PUIkcw z)6Fm)-E|F1;W>Hk2bug29~(yCbmVlhYF=(x7;KfFMb88dEaA68L);zR97&`1#<-m< z@o8yapU=rpHR1HyO)M6=m`HOMgIeXBPDisV0TE>Y81geS>OZ`h>+neqhmv54X0y^t z*@bnp(ToR-D5!kl!qfaOIqHW$`-C->*n3O99?>&^auzJo*?twmGvyhGb{3H_`ruDW zn(+R)FQokYA)0p{c0Ab!ysUs8KZ)F%?aPfOCRsy|Jt?IyABFki8~1EQYB5TbaO?=` z=%nbRg$#ac=VvB^S61JypRYD?|Lei18J zGaQu{{v*|)pAa##V-UM8t5^T13cqbLZ%)KDBP5b5GuV37u&%N6MVT~K5SiTz%0 zMlaG~yjPme@$BDvwZ$r{<*d)GDM2X$P1u)3Tik=+4(;uEpWq#s5p8VVX(S7P+7r0k zoaYZ0Zdr7}&NTu54h4u=z7^{e$SexHOIDrpP>p!v6`!1(FC7;bw;zXHiMH72JbZU2 zacAXN=|wzt6)rK+?ZOKn*CE3Fe-lqEUfd!pjk_OAe{>_d+^|9_D%r)1d@r7dwN;^~ zr#t(6%oyPg+JR4K@hxW582;#aQVK)I__20;Yu|6>PLh900MMOlqSCqfAiQsbz^70o zu68|0PM&6$7I-wC4CPv(F_n0|>#R5u_)NGd?!?|c_%#CH3y9TBAY6Zz}m-l<` zvM%jb?+)uyCo@Kw^5Y3hCoEHV&DuMhggSu_3jz@b*|)bSB;pipyKkx}8(;ikd{w?8FHLXdb+{3wuqHEM1hh?meZpk0Np2CNM!AxOi zk=lGPnE2SPbYfg3)nbT+eSYCo9XkST6QPe7$VQT{c}xBn>wMBFTWT8Nqg^3y&pDX| z?LWcKRXUZ|-$fW4jIOv0>z=r&*z~0JK)7CG%gAO-YPhncsb7OmOtZ^>v>1}Hf1J{z zit+kitrkWzQU2@O8FtzsliugbGom-S)v*LCs@ zygzxUCE5A-*!5KPGTdLWW?kGjuBl4Cd9HX4eMKYsURz3vV(Bc_&Bd7x%FpYclO`La zCVg;pv{#V&$k30NMrrtr0KOAQ*WT6QY%X5$g!k@U4xZE;7|}o%eK(!O#qdtY7b~A8zIPGhR?Aa1U9pjC3)n`;2tsVoHWg6bMyqH8lctnQWEj%6 zt~X$c`60bNwhegZpNZtS{Pfua$qfZa=Kli3q-iACB-GAR-$Fe@0RlwLDYZA;aRcH= z@2uJy`Ja#ddMEro>zN+T!S3PW9NNfh0Qm$@e$R@4@N2%m&xfTK=-_Gff4}~6OvHrz zJPtpqhlF?VbOC^4fW&9f&bhTy@hhIdeWGl456UB`RVcg-28ub+$Io8M4l-{?MZDDE zT;Cep?Dv~1u* zJo}$Vm(KEg;X#ucHwagYf-Btt0Q}+@mLsM=d*rS}R9;d;tNdL43$SOT?U+RU{n<6j zK{&2DYN9As>sj3~?3GD%AT2P9IT$u zv-Ys9^^aslXDasy{4lDXo-ky2m_V>FOl;0R6Bf=kc_zC<v}v zpMyJ%?@s03YyR+bIs=$+nu@71zc}=;@f}?axv{}ZoDm(}@f*>+eQ0AtY)RC?j9k;6 zsNW*4xn%W%^Ra+`nYbe$_KZ>G9g$qx6`jF}WS6=R8Q9lk5|f2bJ=`^{b0JX*9jO0>JD&&#U(u+4VM#wX&6eHF8d=+olc zR;nh{L0xa?{|9hghNTcew{mH2?G+OVrPx&Myn@l`B)(79T7@aH z=rV@Vk(CjxZBgy6k8Zu}IU{w(wiM6RwC@_xSxF?p+WJndqRd<#SlO~^$GhFsyjb<) zwx(OI+CJ8+j)t2E29`ux5yboL2Om-=Wge^$`bNUwHRCqo#)x;rwwk#sA&dhw8-$Jk zVdm+}BC#RX?_riUh!$R(RUAG)&1_+hvq5RMgSMksj;I3mlskxIsG1HEjn8rDxB4^s`%P?%LoPoZVsYGl3#HtYQ$HQum=>>b`MaI`^B|5&PX z&Ea}or}n6p2zb1EyM#SD!mI9sy~(9@7T!0bV9od#)sdt<%R;ff=T2fzed_-9MQ5E= zFRKD!e$r$XZJw_QkKGezf7r-QzQ1c|u_vD)HbIP-Z?OzO+!y1zVa={iN88Y{kkDoN z9+!-+iRTOn;1RCftG^2@cV4VMw!d_fBDzgN*~(C6;V;|Pc;`P>f}XAN&!ij+*C2Ch+gfVTaSB$Jl;z=Dpo{O@JJ~JCg80nRil$1CFOoHUJ?IyRvvTbB; z8OD5bYQZ2VxwJlX4@;;G%g3HiaZaNs%E*)kZ|`a@gbCVB^yfP`Uj}tM=b2gQuvGuN zLD_%7r<8(~Rk3HZu7ON7g*UD>0Y*;#H)jqwQ)usWcqHlanVzh`GTrglHTWL=R?f^! z>pO7?n~OuI*sk2FG-hsEhxECSj}-3_vX70B8JYnAMaogM=eflfH(C*}*1t{M386g^ zR<9YH(BvK&;w5z}4kY&zr$k|`Td`4>Qd-A2`5rPnNW4Chyc(D>ZLy$0Jn(1e)VjRy za*qh?Lh%hlf9Q%_ypd8-fmNtz8JEJl|AC_1m=3EkA9k8}?>7GLYFhvU&rSTHCY92d z^N~8m^>XC~vOO~m5RZ>lKRu`uGM~5p^ZjVnYSKx2Wpxw^RS(tx^8NmFqgw8A-Ro(C zMM*VO#pN+H+%GSAIp_3>NJmyHul>{VsB+tGwz$m-oWfY)jCRdWx*Sf_gxCbC8dq%T zwVYXH(VJD3r5+&kiFKCWCoR(>BiJ*72-U8!y0f}MFekcPxwtL1O=zjsgpz8ADc&bs z{v0{5t*Je*%_4F@l3#EhBzl{=&}{l8^z|Q$9OsEP(+jFncM4y7XZ=QeeK<2A*LV>P zxw1029dpX@96(x(5tM=n5*m~b#!Y>Nwz`!;wK7-V>PjUGb9?nOC^ihb_|J!0X`o7^%d2HCz0$Or8#3WA?V*4w(M=vrb zU^s>#EWORO4NcQt@;8LzwJH^QgY4YefvS#VpO9|k{{^hvuck5V5&zSGd=4eEIG&M zpHDbz;Q8{#4@vHK>I@_aNk@j$8lQETh<^dItcBRN7*(4~CB{qvic(6bUpJIX<*U-3 zKvRLFEt7-|vDTm1ZJ>S*Z!Qu3YXv_*>gS-QuK{*AYoYY{%@WB&kghlSA)Zm0ox3;Z z@{*UfJ;_=x6`#)sopj&joPcO)dfIg_>Csn!ElMnjd(-E3Y3CQs&y-FWT@&w8WTNy6 z`9No#cB#wStCa*7EMhPuKssFtd4lPC?hXZ-l(@Jg$A;Wjsq02ll_4Sx)16EV?Qfr7 zmJX}R!?r+Hew4|EJ#w}1$F^2@k#VV`^Xix}ua~>j>nIAa6fT$O2*UC_;0``_e`P!GAH9z3X zD@=+(p(nuf$-4UriG6n57hx9T)t}2>&?Z9r;DaQB|C}-Y+y~#Bso_t!gS+znezala z0=b5krbvCXf6k<{)5E4HMw^wHhxF}}!x)tpw=cYM)4oE#4N1a1Ao3{nnxzjy_iV?F zDx%0?Bo%k*p}553i@Bt!Q<|Fg~I45C)}k`lpJ4%-H!!su8pMT5r; zP%Ws3E|BK>TQ8py1o_=DEK8JOre~5yWo5I5iyuolXR)CEgphRP{kX_zsCnX1h4A|I z=N88V+bq&UXH{H@cuW|)H6tx07n^zW5bLcjfkZ;3`QEg##)0q{q(VBC zl_4vR7*-0xjpt&90q+NP9!kNDSc4-;lHkEZ4Ve?^x5Bd@NZrhaT{I1@F%1k1DL^AT z5i(C7ikvw{-p86G5!~K4FpJnBnVn3~*bf-BXp;x$)5VPJGT?ifow{B~jH9N$2|6%v zQxE^_FC~Wml6Cpwn&NO8iyKh*w2GB0oSHhY%}?Q9bPTmODC>xXIG8eluaej|4?bC1 z*KbD3w7SZI4PnJmG#V8=L%Y;YR3OjBf3MEH?%+Fq9N%%N7GFS9wR6oV23mwV?Vb7T zYYTXL+0+*Hk`fm->E0VG)=M1&z8xqech=S39$E<>H$GE_VcnQA^@!`e=_tgfsKVS6 z=V$Ht$n18IqeR@} z6uRi2?McO29;yoFJshx8x7xJ&EeDOyG7K@pD)Pl5Wpv6QALh38{&nPUqDnXn1Eo z9>?zXm8wRX4RCwEFbrRJCTV~^1ioOBPaFk>5Xh)ix$8MHgiV4rmnZ|E8q^ye&huasl7*1`%uXFz149nuoR`9`y*?`Yb z#b&i1GDE^03|14hpg(DJZuCf3NyD~T2TuVjl+Mt@#TN&LZ^`ax?#P(UtQ4`sVOpXcjVEMm`Jzs~2Y$$@Z$7k|yXv;ujP zYp?hVK(SMc6%RFCU*E}xz!;+7hK@mTa%FTXmhTz0?MFCrwT8>Ka716*_A%(F`*{>x#%MX2jw2CILmS&$RqnFXq|Y5 zj%JSpT?UU6HOI=#pXW=$DBJ#{cACE%fWRun*77-3Y<+o zf*#`|0O#6O03^<0bZ~4yFtM-bC(8jP2|%0(mV;8x?wI55$5MfX!aFv%i!L@6AA%gN z7xT`V|NX)%Ktb*U5MdZV2(;Mo$TP$?042W&xWEZM6Cj}I0RX|K1Ym@NorH5Eft`dO zyZ!EcaLMnq8Zef8Ua0n_THCcz~@lmn;|SBwwKKm0I;AM zHwCjos;JS7^^p+Bbc<;oftBPdU(5I-Jrr>J3ewLaMFJrcz;EqGNC1Eo0f6wy?{5B@ F{$CBJ0$Bh6 -- Gitee From df064f45386340ad648bafd9cfe87f8d89c4ddff Mon Sep 17 00:00:00 2001 From: tanjh Date: Wed, 28 Jun 2023 13:26:48 +0800 Subject: [PATCH 077/112] =?UTF-8?q?=E8=A1=A5=E5=85=85FAQ=20(#10074)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 增加两个实际训练过程中遇到的问题及解决方案 --- doc/doc_ch/kie.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/doc/doc_ch/kie.md b/doc/doc_ch/kie.md index 26d2e560..a79ce3eb 100644 --- a/doc/doc_ch/kie.md +++ b/doc/doc_ch/kie.md @@ -505,3 +505,13 @@ RE可视化结果默认保存到`./output`文件夹里面,结果示例如下 Q1: 训练模型转inference 模型之后预测效果不一致? **A**:该问题多是trained model预测时候的预处理、后处理参数和inference model预测的时候的预处理、后处理参数不一致导致的。可以对比训练使用的配置文件中的预处理、后处理和预测时是否存在差异。 + +Q2: 训练过程中报如下错误: +ValueError: (InvalidArgument) The 'shape' attribute in ReshapeOp is invalid. The input tensor X'size must be divisible by known capacity of 'shape'. But received X's shape = [4, 512, 23], X's size = 47104, 'shape' is [-1, 7], known capacity of 'shape' is -7. +**A**:是由于训练使用的配置文件ser_vi_layoutxlm_xfund_zh.yml中Architecture.Backbone.num_classes的值与Loss.num_classes的值不一致导致。 + +Q3: 训练过程中报如下错误: +IndexError: (OutOfRange) label value should less than the shape of axis dimension when label value(23) not equal to ignore_index(-100), But received label value as 23 and shape of axis dimension is 23. +**A**:是由于训练使用的配置文件ser_vi_layoutxlm_xfund_zh.yml中Architecture.Backbone.num_classes的值与Loss.num_classes的值不正确。 +由于采用BIO标注,所以默认会有一个"O"标签,同时会忽略"OTHER", "OTHERS", "IGNORE"三个标签。PostProcess.class_path设置的字典文件中的每种类型会自动扩展成"B-"和"I-"为前缀的标签。 +所以假设字典文件中包含n个类型(包含OTHER)时,num_classes应该为2n-1;假设字典文件中包含n个类型(不含OTHER)时,num_classes应该为2n+1。 -- Gitee From d37209e552a39b0ce86cbf850f44fcf25d8ab539 Mon Sep 17 00:00:00 2001 From: ChuongLoc <89434232+ChuongLoc@users.noreply.github.com> Date: Thu, 29 Jun 2023 10:21:11 +0700 Subject: [PATCH 078/112] Update recognition_en.md (#10059) ic15_dict.txt only have 36 digits --- doc/doc_en/recognition_en.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/doc_en/recognition_en.md b/doc/doc_en/recognition_en.md index 78917aea..bf14a3ea 100644 --- a/doc/doc_en/recognition_en.md +++ b/doc/doc_en/recognition_en.md @@ -80,7 +80,7 @@ PaddleOCR has built-in dictionaries, which can be used on demand. `ppocr/utils/ppocr_keys_v1.txt` is a Chinese dictionary with 6623 characters. -`ppocr/utils/ic15_dict.txt` is an English dictionary with 63 characters +`ppocr/utils/ic15_dict.txt` is an English dictionary with 36 characters `ppocr/utils/dict/french_dict.txt` is a French dictionary with 118 characters -- Gitee From 03ceab9744035b3237202075501a51e7d36d06a4 Mon Sep 17 00:00:00 2001 From: shiyutang <34859558+shiyutang@users.noreply.github.com> Date: Fri, 30 Jun 2023 11:26:03 +0800 Subject: [PATCH 079/112] Create mark_stale_issue.yml --- .github/workflows/mark_stale_issue.yml | 31 ++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 .github/workflows/mark_stale_issue.yml diff --git a/.github/workflows/mark_stale_issue.yml b/.github/workflows/mark_stale_issue.yml new file mode 100644 index 00000000..a567c2e8 --- /dev/null +++ b/.github/workflows/mark_stale_issue.yml @@ -0,0 +1,31 @@ +# This workflow warns and then closes issues and PRs that have had no activity for a specified amount of time. +# +# You can adjust the behavior by modifying this file. +# For more information, see: +# https://github.com/actions/stale +name: Mark stale issues and pull requests + +on: + schedule: + - cron: '30 1 * * *' + +jobs: + stale: + runs-on: ubuntu-latest + permissions: + issues: write + pull-requests: none + + steps: + - uses: actions/stale@v8 + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + stale-issue-message: 'This issue has been automatically marked as stale because it has not had recent activity. It will be closed in 7 days if no further activity occurs. Thank you for your contributions.' + stale-issue-label: 'stale' + close-issue-reason: 'The issue is stale and not response is provided after 7 days.' + exempt-issue-labels: 'bug, feature request, good first issue' + operations-per-run: 900 + days-before-pr-stale: -1 + days-before-pr-close: -1 + ascending: true + -- Gitee From 3a0542e7d6cd7fe82839cd43f8c0b512ee8fbb3c Mon Sep 17 00:00:00 2001 From: shiyutang <34859558+shiyutang@users.noreply.github.com> Date: Fri, 30 Jun 2023 11:39:24 +0800 Subject: [PATCH 080/112] Update mark_stale_issue.yml --- .github/workflows/mark_stale_issue.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/mark_stale_issue.yml b/.github/workflows/mark_stale_issue.yml index a567c2e8..8775dffe 100644 --- a/.github/workflows/mark_stale_issue.yml +++ b/.github/workflows/mark_stale_issue.yml @@ -8,6 +8,9 @@ name: Mark stale issues and pull requests on: schedule: - cron: '30 1 * * *' + push: + branches: + - release/2.6 jobs: stale: @@ -19,7 +22,7 @@ jobs: steps: - uses: actions/stale@v8 with: - repo-token: ${{ secrets.GITHUB_TOKEN }} + repo-token: ${{ secrets.PAT }} stale-issue-message: 'This issue has been automatically marked as stale because it has not had recent activity. It will be closed in 7 days if no further activity occurs. Thank you for your contributions.' stale-issue-label: 'stale' close-issue-reason: 'The issue is stale and not response is provided after 7 days.' -- Gitee From d983e555fdcc2d7d69ce14585924fa28f6eb40ce Mon Sep 17 00:00:00 2001 From: shiyutang <34859558+shiyutang@users.noreply.github.com> Date: Fri, 30 Jun 2023 11:40:20 +0800 Subject: [PATCH 081/112] Update mark_stale_issue.yml --- .github/workflows/mark_stale_issue.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/mark_stale_issue.yml b/.github/workflows/mark_stale_issue.yml index 8775dffe..d6e86de3 100644 --- a/.github/workflows/mark_stale_issue.yml +++ b/.github/workflows/mark_stale_issue.yml @@ -25,7 +25,7 @@ jobs: repo-token: ${{ secrets.PAT }} stale-issue-message: 'This issue has been automatically marked as stale because it has not had recent activity. It will be closed in 7 days if no further activity occurs. Thank you for your contributions.' stale-issue-label: 'stale' - close-issue-reason: 'The issue is stale and not response is provided after 7 days.' + close-issue-reason: 'completed' exempt-issue-labels: 'bug, feature request, good first issue' operations-per-run: 900 days-before-pr-stale: -1 -- Gitee From 8c9f43f44fd5d5be9013006814c14c1a4c7dfadd Mon Sep 17 00:00:00 2001 From: shiyutang <34859558+shiyutang@users.noreply.github.com> Date: Fri, 30 Jun 2023 11:42:13 +0800 Subject: [PATCH 082/112] Update mark_stale_issue.yml --- .github/workflows/mark_stale_issue.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/mark_stale_issue.yml b/.github/workflows/mark_stale_issue.yml index d6e86de3..973846de 100644 --- a/.github/workflows/mark_stale_issue.yml +++ b/.github/workflows/mark_stale_issue.yml @@ -22,7 +22,7 @@ jobs: steps: - uses: actions/stale@v8 with: - repo-token: ${{ secrets.PAT }} + repo-token: ${{ secrets.GITHUB_TOKEN }} stale-issue-message: 'This issue has been automatically marked as stale because it has not had recent activity. It will be closed in 7 days if no further activity occurs. Thank you for your contributions.' stale-issue-label: 'stale' close-issue-reason: 'completed' -- Gitee From 4b275f1857dfd44524db0860c6aa9bfcb75a7a81 Mon Sep 17 00:00:00 2001 From: itasli Date: Fri, 30 Jun 2023 03:59:38 +0000 Subject: [PATCH 083/112] fix undefined save_model_dir when using wandb (#10251) * fix undefined save_model_dir when using wand * Update program.py --- tools/program.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/program.py b/tools/program.py index a8373435..209212fd 100755 --- a/tools/program.py +++ b/tools/program.py @@ -685,7 +685,7 @@ def preprocess(is_train=False): wandb_params = config['wandb'] else: wandb_params = dict() - wandb_params.update({'save_dir': save_model_dir}) + wandb_params.update({'save_dir': save_dir}) log_writer = WandbLogger(**wandb_params, config=config) loggers.append(log_writer) else: -- Gitee From 1fa3575030fde2696eda53fcc92b6a95ebf186fd Mon Sep 17 00:00:00 2001 From: Wang Xin Date: Tue, 4 Jul 2023 15:13:23 +0800 Subject: [PATCH 084/112] Update ocr_rec.h (#9469) It is enough to include preprocess_op.h, we do not need to include ocr_cls.h. --- deploy/cpp_infer/include/ocr_rec.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/cpp_infer/include/ocr_rec.h b/deploy/cpp_infer/include/ocr_rec.h index 257c2610..f3712cb3 100644 --- a/deploy/cpp_infer/include/ocr_rec.h +++ b/deploy/cpp_infer/include/ocr_rec.h @@ -17,7 +17,7 @@ #include "paddle_api.h" #include "paddle_inference_api.h" -#include +#include #include namespace PaddleOCR { -- Gitee From b8888627a59985f361e419f6f624a77839c8ad14 Mon Sep 17 00:00:00 2001 From: livingbody Date: Tue, 4 Jul 2023 15:42:07 +0800 Subject: [PATCH 085/112] Update paddleocr.py (#10290) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 解决多次识别pdf,受第一次页面最大数量影响的bug --- paddleocr.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/paddleocr.py b/paddleocr.py index ba707d66..9b7f4e06 100644 --- a/paddleocr.py +++ b/paddleocr.py @@ -532,8 +532,9 @@ class PaddleOCR(predict_system.TextSystem): # for infer pdf file if isinstance(img, list): if self.page_num > len(img) or self.page_num == 0: - self.page_num = len(img) - imgs = img[:self.page_num] + imgs=img + else: + imgs = img[:self.page_num] else: imgs = [img] if det and rec: -- Gitee From ebfbb80dcf469f2603d22ba02ccef833f1cdfc77 Mon Sep 17 00:00:00 2001 From: tanjh Date: Wed, 5 Jul 2023 14:58:00 +0800 Subject: [PATCH 086/112] =?UTF-8?q?=E8=A1=A5=E5=85=85num=5Fclasses?= =?UTF-8?q?=E6=B3=A8=E9=87=8A=E8=AF=B4=E6=98=8E=20(#10073)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ser_vi_layoutxlm_xfund_zh.yml中的Architecture.Backbone.num_classes所赋值会设置给Loss.num_classes, 由于采用BIO标注,假设字典中包含n个字段(包含other)时,则类别数为2n-1;假设字典中包含n个字段(不含other)时,则类别数为2n+1。 --- doc/doc_ch/kie.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/doc_ch/kie.md b/doc/doc_ch/kie.md index a79ce3eb..d18a0663 100644 --- a/doc/doc_ch/kie.md +++ b/doc/doc_ch/kie.md @@ -205,7 +205,7 @@ Architecture: name: LayoutXLMForSer pretrained: True mode: vi - # 假设字典中包含n个字段(包含other),由于采用BIO标注,则类别数为2n-1 + # 由于采用BIO标注,假设字典中包含n个字段(包含other)时,则类别数为2n-1; 假设字典中包含n个字段(不含other)时,则类别数为2n+1。否则在train过程会报:IndexError: (OutOfRange) label value should less than the shape of axis dimension 。 num_classes: &num_classes 7 PostProcess: -- Gitee From a2ae303805506b55b53b9186de5170e451069d38 Mon Sep 17 00:00:00 2001 From: Louis Maddox Date: Wed, 5 Jul 2023 07:59:42 +0100 Subject: [PATCH 087/112] Update algorithm_overview_en.md (#9747) Fix links to super-resolution algorithm docs --- doc/doc_en/algorithm_overview_en.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/doc_en/algorithm_overview_en.md b/doc/doc_en/algorithm_overview_en.md index be05dd91..67acbe22 100755 --- a/doc/doc_en/algorithm_overview_en.md +++ b/doc/doc_en/algorithm_overview_en.md @@ -111,8 +111,8 @@ Refer to [DTRB](https://arxiv.org/abs/1904.01906), the training and evaluation r ### 1.3 Text Super-Resolution Algorithms Supported text super-resolution algorithms (Click the link to get the tutorial): -- [x] [Text Gestalt](./algorithm_sr_gestalt.md) -- [x] [Text Telescope](./algorithm_sr_telescope.md) +- [x] [Text Gestalt](./algorithm_sr_gestalt_en.md) +- [x] [Text Telescope](./algorithm_sr_telescope_en.md) On the TextZoom public dataset, the effect of the algorithm is as follows: -- Gitee From 890230e1f389967530460a49fc6e095b4ce5c48e Mon Sep 17 00:00:00 2001 From: duanyanhui <45005871+YanhuiDua@users.noreply.github.com> Date: Wed, 5 Jul 2023 15:42:03 +0800 Subject: [PATCH 088/112] [npu-tipc] update npu tipc scripts(#10102) (#10293) * update npu tipc scripts * update --- test_tipc/test_train_inference_python_npu.sh | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/test_tipc/test_train_inference_python_npu.sh b/test_tipc/test_train_inference_python_npu.sh index bab70fc7..ebeaefbc 100644 --- a/test_tipc/test_train_inference_python_npu.sh +++ b/test_tipc/test_train_inference_python_npu.sh @@ -35,12 +35,20 @@ dataline=`cat $FILENAME` IFS=$'\n' lines=(${dataline}) +modelname=$(echo ${lines[1]} | cut -d ":" -f2) +if [ $modelname == "rec_r31_sar" ] || [ $modelname == "rec_mtb_nrtr" ]; then + sed -i "s/Global.epoch_num:lite_train_lite_infer=2/Global.epoch_num:lite_train_lite_infer=1/g" $FILENAME + sed -i "s/gpu_list:0|0,1/gpu_list:0,1/g" $FILENAME + sed -i "s/Global.use_npu:True|True/Global.use_npu:True/g" $FILENAME +fi + # replace training config file grep -n 'tools/.*yml' $FILENAME | cut -d ":" -f 1 \ | while read line_num ; do train_cmd=$(func_parser_value "${lines[line_num-1]}") trainer_config=$(func_parser_config ${train_cmd}) sed -i 's/use_gpu/use_npu/g' "$REPO_ROOT_PATH/$trainer_config" + sed -i 's/use_sync_bn: True/use_sync_bn: False/g' "$REPO_ROOT_PATH/$trainer_config" done # change gpu to npu in execution script -- Gitee From b06840d49333e7d8e8433d549def07c5c5501598 Mon Sep 17 00:00:00 2001 From: n0099 Date: Wed, 5 Jul 2023 16:25:17 +0800 Subject: [PATCH 089/112] =?UTF-8?q?=E6=94=B9=E8=BF=9B=E6=96=87=E6=A1=A3`de?= =?UTF-8?q?ploy/hubserving/readme.md`=E5=92=8C`doc/doc=5Fch/models=5Flist.?= =?UTF-8?q?md`=20(#9110)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Update readme.md * Update readme.md * Update readme.md * Update models_list.md * trim trailling spaces @ `deploy/hubserving/readme_en.md` * `s/shell/bash/` @ `deploy/hubserving/readme_en.md` * Update `deploy/hubserving/readme_en.md` to sync with `deploy/hubserving/readme.md` * Update deploy/hubserving/readme_en.md to sync with `deploy/hubserving/readme.md` * Update deploy/hubserving/readme_en.md to sync with `deploy/hubserving/readme.md` * Update `doc/doc_en/models_list_en.md` to sync with `doc/doc_ch/models_list_en.md` * using Grammarly to weak `deploy/hubserving/readme_en.md` * using Grammarly to tweak `doc/doc_en/models_list_en.md` * `ocr_system` module will return with values of field `confidence` * Update README_CN.md --- deploy/hubserving/readme.md | 289 ++++++++++++++---------------- deploy/hubserving/readme_en.md | 318 +++++++++++++++------------------ deploy/pdserving/README_CN.md | 7 +- doc/doc_ch/models_list.md | 2 + doc/doc_en/models_list_en.md | 38 ++-- setup.py | 2 +- 6 files changed, 305 insertions(+), 351 deletions(-) diff --git a/deploy/hubserving/readme.md b/deploy/hubserving/readme.md index 8f4d0869..9302bad9 100755 --- a/deploy/hubserving/readme.md +++ b/deploy/hubserving/readme.md @@ -3,7 +3,7 @@ - [基于PaddleHub Serving的服务部署](#基于paddlehub-serving的服务部署) - [1. 近期更新](#1-近期更新) - [2. 快速启动服务](#2-快速启动服务) - - [2.1 准备环境](#21-准备环境) + - [2.1 安装PaddleHub](#21-安装PaddleHub) - [2.2 下载推理模型](#22-下载推理模型) - [2.3 安装服务模块](#23-安装服务模块) - [2.4 启动服务](#24-启动服务) @@ -15,8 +15,8 @@ PaddleOCR提供2种服务部署方式: -- 基于PaddleHub Serving的部署:代码路径为"`./deploy/hubserving`",按照本教程使用; -- 基于PaddleServing的部署:代码路径为"`./deploy/pdserving`",使用方法参考[文档](../../deploy/pdserving/README_CN.md)。 +- 基于PaddleHub Serving的部署:代码路径为`./deploy/hubserving`,按照本教程使用; +- 基于PaddleServing的部署:代码路径为`./deploy/pdserving`,使用方法参考[文档](../../deploy/pdserving/README_CN.md)。 # 基于PaddleHub Serving的服务部署 @@ -51,120 +51,77 @@ deploy/hubserving/ocr_system/ ## 2. 快速启动服务 以下步骤以检测+识别2阶段串联服务为例,如果只需要检测服务或识别服务,替换相应文件路径即可。 -### 2.1 准备环境 -```shell -# 安装paddlehub -# paddlehub 需要 python>3.6.2 +### 2.1 安装PaddleHub +paddlehub 需要 python>3.6.2 +```bash pip3 install paddlehub==2.1.0 --upgrade -i https://mirror.baidu.com/pypi/simple ``` ### 2.2 下载推理模型 安装服务模块前,需要准备推理模型并放到正确路径。默认使用的是PP-OCRv3模型,默认模型路径为: +| 模型 | 路径 | +| ------- | - | +| 检测模型 | `./inference/ch_PP-OCRv3_det_infer/` | +| 识别模型 | `./inference/ch_PP-OCRv3_rec_infer/` | +| 方向分类器 | `./inference/ch_ppocr_mobile_v2.0_cls_infer/` | +| 版面分析模型 | `./inference/picodet_lcnet_x1_0_fgd_layout_infer/` | +| 表格结构识别模型 | `./inference/ch_ppstructure_mobile_v2.0_SLANet_infer/` | +| 关键信息抽取SER模型 | `./inference/ser_vi_layoutxlm_xfund_infer/` | +| 关键信息抽取RE模型 | `./inference/re_vi_layoutxlm_xfund_infer/` | -``` -检测模型:./inference/ch_PP-OCRv3_det_infer/ -识别模型:./inference/ch_PP-OCRv3_rec_infer/ -方向分类器:./inference/ch_ppocr_mobile_v2.0_cls_infer/ -版面分析模型:./inference/picodet_lcnet_x1_0_fgd_layout_infer/ -表格结构识别模型:./inference/ch_ppstructure_mobile_v2.0_SLANet_infer/ -关键信息抽取SER模型:./inference/ser_vi_layoutxlm_xfund_infer/ -关键信息抽取RE模型:./inference/re_vi_layoutxlm_xfund_infer/ -``` +**模型路径可在`params.py`中查看和修改。** -**模型路径可在`params.py`中查看和修改。** 更多模型可以从PaddleOCR提供的模型库[PP-OCR](../../doc/doc_ch/models_list.md)和[PP-Structure](../../ppstructure/docs/models_list.md)下载,也可以替换成自己训练转换好的模型。 +更多模型可以从PaddleOCR提供的模型库[PP-OCR](../../doc/doc_ch/models_list.md)和[PP-Structure](../../ppstructure/docs/models_list.md)下载,也可以替换成自己训练转换好的模型。 ### 2.3 安装服务模块 PaddleOCR提供5种服务模块,根据需要安装所需模块。 -* 在Linux环境下,安装示例如下: -```shell -# 安装检测服务模块: -hub install deploy/hubserving/ocr_det/ - -# 或,安装分类服务模块: -hub install deploy/hubserving/ocr_cls/ - -# 或,安装识别服务模块: -hub install deploy/hubserving/ocr_rec/ - -# 或,安装检测+识别串联服务模块: -hub install deploy/hubserving/ocr_system/ - -# 或,安装表格识别服务模块: -hub install deploy/hubserving/structure_table/ - -# 或,安装PP-Structure服务模块: -hub install deploy/hubserving/structure_system/ - -# 或,安装版面分析服务模块: -hub install deploy/hubserving/structure_layout/ - -# 或,安装关键信息抽取SER服务模块: -hub install deploy/hubserving/kie_ser/ - -# 或,安装关键信息抽取SER+RE服务模块: -hub install deploy/hubserving/kie_ser_re/ -``` - -* 在Windows环境下(文件夹的分隔符为`\`),安装示例如下: -```shell -# 安装检测服务模块: -hub install deploy\hubserving\ocr_det\ - -# 或,安装分类服务模块: -hub install deploy\hubserving\ocr_cls\ - -# 或,安装识别服务模块: -hub install deploy\hubserving\ocr_rec\ - -# 或,安装检测+识别串联服务模块: -hub install deploy\hubserving\ocr_system\ - -# 或,安装表格识别服务模块: -hub install deploy\hubserving\structure_table\ - -# 或,安装PP-Structure服务模块: -hub install deploy\hubserving\structure_system\ - -# 或,安装版面分析服务模块: -hub install deploy\hubserving\structure_layout\ - -# 或,安装关键信息抽取SER服务模块: -hub install deploy\hubserving\kie_ser\ - -# 或,安装关键信息抽取SER+RE服务模块: -hub install deploy\hubserving\kie_ser_re\ -``` +在Linux环境(Windows环境请将`/`替换为`\`)下,安装模块命令如下表: +| 服务模块 | 命令 | +| ------- | - | +| 检测 | `hub install deploy/hubserving/ocr_det` | +| 分类 | `hub install deploy/hubserving/ocr_cls` | +| 识别 | `hub install deploy/hubserving/ocr_rec` | +| 检测+识别串联 | `hub install deploy/hubserving/ocr_system` | +| 表格识别 | `hub install deploy/hubserving/structure_table` | +| PP-Structure | `hub install deploy/hubserving/structure_system` | +| 版面分析 | `hub install deploy/hubserving/structure_layout` | +| 关键信息抽取SER | `hub install deploy/hubserving/kie_ser` | +| 关键信息抽取SER+RE | `hub install deploy/hubserving/kie_ser_re` | ### 2.4 启动服务 #### 2.4.1. 命令行命令启动(仅支持CPU) -**启动命令:** -```shell -$ hub serving start --modules [Module1==Version1, Module2==Version2, ...] \ - --port XXXX \ - --use_multiprocess \ - --workers \ +**启动命令:** +```bash +hub serving start --modules Module1==Version1, Module2==Version2, ... \ + --port 8866 \ + --use_multiprocess \ + --workers \ ``` -**参数:** - -|参数|用途| -|---|---| -|--modules/-m|PaddleHub Serving预安装模型,以多个Module==Version键值对的形式列出
*`当不指定Version时,默认选择最新版本`*| -|--port/-p|服务端口,默认为8866| -|--use_multiprocess|是否启用并发方式,默认为单进程方式,推荐多核CPU机器使用此方式
*`Windows操作系统只支持单进程方式`*| -|--workers|在并发方式下指定的并发任务数,默认为`2*cpu_count-1`,其中`cpu_count`为CPU核数| - -如启动串联服务: ```hub serving start -m ocr_system``` +**参数:** +|参数|用途| +|---|---| +|`--modules`/`-m`|PaddleHub Serving预安装模型,以多个Module==Version键值对的形式列出
**当不指定Version时,默认选择最新版本**| +|`--port`/`-p`|服务端口,默认为8866| +|`--use_multiprocess`|是否启用并发方式,默认为单进程方式,推荐多核CPU机器使用此方式
**Windows操作系统只支持单进程方式**| +|`--workers`|在并发方式下指定的并发任务数,默认为`2*cpu_count-1`,其中`cpu_count`为CPU核数| + +如启动串联服务: +```bash +hub serving start -m ocr_system +``` 这样就完成了一个服务化API的部署,使用默认端口号8866。 #### 2.4.2 配置文件启动(支持CPU、GPU) -**启动命令:** -```hub serving start -c config.json``` +**启动命令:** +```bash +hub serving start -c config.json +``` 其中,`config.json`格式如下: -```python +```json { "modules_info": { "ocr_system": { @@ -182,48 +139,59 @@ $ hub serving start --modules [Module1==Version1, Module2==Version2, ...] \ } ``` -- `init_args`中的可配参数与`module.py`中的`_initialize`函数接口一致。其中,**当`use_gpu`为`true`时,表示使用GPU启动服务**。 +- `init_args`中的可配参数与`module.py`中的`_initialize`函数接口一致。 + + **当`use_gpu`为`true`时,表示使用GPU启动服务。** - `predict_args`中的可配参数与`module.py`中的`predict`函数接口一致。 -**注意:** +**注意:** - 使用配置文件启动服务时,其他参数会被忽略。 -- 如果使用GPU预测(即,`use_gpu`置为`true`),则需要在启动服务之前,设置CUDA_VISIBLE_DEVICES环境变量,如:```export CUDA_VISIBLE_DEVICES=0```,否则不用设置。 +- 如果使用GPU预测(即,`use_gpu`置为`true`),则需要在启动服务之前,设置CUDA_VISIBLE_DEVICES环境变量,如: + ```bash + export CUDA_VISIBLE_DEVICES=0 + ``` - **`use_gpu`不可与`use_multiprocess`同时为`true`**。 -如,使用GPU 3号卡启动串联服务: -```shell +如,使用GPU 3号卡启动串联服务: +```bash export CUDA_VISIBLE_DEVICES=3 hub serving start -c deploy/hubserving/ocr_system/config.json ``` ## 3. 发送预测请求 -配置好服务端,可使用以下命令发送预测请求,获取预测结果: - -```python tools/test_hubserving.py --server_url=server_url --image_dir=image_path``` - -需要给脚本传递2个参数: -- **server_url**:服务地址,格式为 -`http://[ip_address]:[port]/predict/[module_name]` -例如,如果使用配置文件启动分类,检测、识别,检测+分类+识别3阶段,表格识别和PP-Structure服务,那么发送请求的url将分别是: -`http://127.0.0.1:8865/predict/ocr_det` -`http://127.0.0.1:8866/predict/ocr_cls` -`http://127.0.0.1:8867/predict/ocr_rec` -`http://127.0.0.1:8868/predict/ocr_system` -`http://127.0.0.1:8869/predict/structure_table` -`http://127.0.0.1:8870/predict/structure_system` -`http://127.0.0.1:8870/predict/structure_layout` -`http://127.0.0.1:8871/predict/kie_ser` -`http://127.0.0.1:8872/predict/kie_ser_re` -- **image_dir**:测试图像路径,可以是单张图片路径,也可以是图像集合目录路径 -- **visualize**:是否可视化结果,默认为False -- **output**:可视化结果保存路径,默认为`./hubserving_result` - -访问示例: -```python tools/test_hubserving.py --server_url=http://127.0.0.1:8868/predict/ocr_system --image_dir=./doc/imgs/ --visualize=false``` +配置好服务端,可使用以下命令发送预测请求,获取预测结果: +```bash +python tools/test_hubserving.py --server_url=server_url --image_dir=image_path +``` + +需要给脚本传递2个参数: +- `server_url`:服务地址,格式为`http://[ip_address]:[port]/predict/[module_name]` + + 例如,如果使用配置文件启动分类,检测、识别,检测+分类+识别3阶段,表格识别和PP-Structure服务 + + 并为每个服务修改了port,那么发送请求的url将分别是: + ``` + http://127.0.0.1:8865/predict/ocr_det + http://127.0.0.1:8866/predict/ocr_cls + http://127.0.0.1:8867/predict/ocr_rec + http://127.0.0.1:8868/predict/ocr_system + http://127.0.0.1:8869/predict/structure_table + http://127.0.0.1:8870/predict/structure_system + http://127.0.0.1:8870/predict/structure_layout + http://127.0.0.1:8871/predict/kie_ser + http://127.0.0.1:8872/predict/kie_ser_re + ``` +- `image_dir`:测试图像路径,可以是单张图片路径,也可以是图像集合目录路径 +- `visualize`:是否可视化结果,默认为False +- `output`:可视化结果保存路径,默认为`./hubserving_result` + +访问示例: +```bash +python tools/test_hubserving.py --server_url=http://127.0.0.1:8868/predict/ocr_system --image_dir=./doc/imgs/ --visualize=false +``` ## 4. 返回结果格式说明 返回结果为列表(list),列表中的每一项为词典(dict),词典一共可能包含3种字段,信息如下: - |字段名称|数据类型|意义| |---|---|---| |angle|str|文本角度| @@ -231,41 +199,52 @@ hub serving start -c deploy/hubserving/ocr_system/config.json |confidence|float| 文本识别置信度或文本角度分类置信度| |text_region|list|文本位置坐标| |html|str|表格的html字符串| -|regions|list|版面分析+表格识别+OCR的结果,每一项为一个list,包含表示区域坐标的`bbox`,区域类型的`type`和区域结果的`res`三个字段| +|regions|list|版面分析+表格识别+OCR的结果,每一项为一个list
包含表示区域坐标的`bbox`,区域类型的`type`和区域结果的`res`三个字段| |layout|list|版面分析的结果,每一项一个dict,包含版面区域坐标的`bbox`,区域类型的`label`| 不同模块返回的字段不同,如,文本识别服务模块返回结果不含`text_region`字段,具体信息如下: - -| 字段名/模块名 | ocr_det | ocr_cls | ocr_rec | ocr_system | structure_table | structure_system | Structure_layout | kie_ser | kie_re | -| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | -|angle| | ✔ | | ✔ | ||| -|text| | |✔|✔| | ✔ | | ✔ | ✔ | -|confidence| |✔ |✔| | | ✔| |✔ | ✔ | -|text_region| ✔| | |✔ | | ✔| |✔ | ✔ | -|html| | | | |✔ |✔||| | -|regions| | | | |✔ |✔ | || | -|layout| | | | | | | ✔ || | -|ser_res| | | | | | | | ✔ | | -|re_res| | | | | | | | | ✔ | - +|字段名/模块名 |ocr_det |ocr_cls |ocr_rec |ocr_system |structure_table |structure_system |structure_layout |kie_ser |kie_re | +|--- |--- |--- |--- |--- |--- |--- |--- |--- |--- | +|angle | |✔ | |✔ | | | | +|text | | |✔ |✔ | |✔ | |✔ |✔ | +|confidence | |✔ |✔ |✔ | |✔ | |✔ |✔ | +|text_region |✔ | | |✔ | |✔ | |✔ |✔ | +|html | | | | |✔ |✔ | | | | +|regions | | | | |✔ |✔ | | | | +|layout | | | | | | |✔ | | | +|ser_res | | | | | | | |✔ | | +|re_res | | | | | | | | |✔ | **说明:** 如果需要增加、删除、修改返回字段,可在相应模块的`module.py`文件中进行修改,完整流程参考下一节自定义修改服务模块。 ## 5. 自定义修改服务模块 -如果需要修改服务逻辑,你一般需要操作以下步骤(以修改`ocr_system`为例): - -- 1、 停止服务 -```hub serving stop --port/-p XXXX``` - -- 2、 到相应的`module.py`和`params.py`等文件中根据实际需求修改代码。 -例如,如果需要替换部署服务所用模型,则需要到`params.py`中修改模型路径参数`det_model_dir`和`rec_model_dir`,如果需要关闭文本方向分类器,则将参数`use_angle_cls`置为`False`,当然,同时可能还需要修改其他相关参数,请根据实际情况修改调试。 **强烈建议修改后先直接运行`module.py`调试,能正确运行预测后再启动服务测试。** -**注意** PPOCR-v3识别模型使用的图片输入shape为`3,48,320`,因此需要修改`params.py`中的`cfg.rec_image_shape = "3, 48, 320"`,如果不使用PPOCR-v3识别模型,则无需修改该参数。 - -- 3、 卸载旧服务包 -```hub uninstall ocr_system``` - -- 4、 安装修改后的新服务包 -```hub install deploy/hubserving/ocr_system/``` - -- 5、重新启动服务 -```hub serving start -m ocr_system``` +如果需要修改服务逻辑,一般需要操作以下步骤(以修改`deploy/hubserving/ocr_system`为例): + +1. 停止服务: + ```bash + hub serving stop --port/-p XXXX + ``` +2. 到`deploy/hubserving/ocr_system`下的`module.py`和`params.py`等文件中根据实际需求修改代码。 + + 例如,如果需要替换部署服务所用模型,则需要到`params.py`中修改模型路径参数`det_model_dir`和`rec_model_dir`,如果需要关闭文本方向分类器,则将参数`use_angle_cls`置为`False` + + 当然,同时可能还需要修改其他相关参数,请根据实际情况修改调试。 + + **强烈建议修改后先直接运行`module.py`调试,能正确运行预测后再启动服务测试。** + + **注意:** PPOCR-v3识别模型使用的图片输入shape为`3,48,320`,因此需要修改`params.py`中的`cfg.rec_image_shape = "3, 48, 320"`,如果不使用PPOCR-v3识别模型,则无需修改该参数。 +3. (可选)如果想要重命名模块需要更改`module.py`文件中的以下行: + - [`from deploy.hubserving.ocr_system.params import read_params`中的`ocr_system`](https://github.com/PaddlePaddle/PaddleOCR/blob/a923f35de57b5e378f8dd16e54d0a3e4f51267fd/deploy/hubserving/ocr_system/module.py#L35) + - [`name="ocr_system",`中的`ocr_system`](https://github.com/PaddlePaddle/PaddleOCR/blob/a923f35de57b5e378f8dd16e54d0a3e4f51267fd/deploy/hubserving/ocr_system/module.py#L39) +4. (可选)可能需要删除`__pycache__`目录以强制刷新CPython缓存: + ```bash + find deploy/hubserving/ocr_system -name '__pycache__' -exec rm -r {} \; + ``` +5. 安装修改后的新服务包: + ```bash + hub install deploy/hubserving/ocr_system + ``` +6. 重新启动服务: + ```bash + hub serving start -m ocr_system + ``` diff --git a/deploy/hubserving/readme_en.md b/deploy/hubserving/readme_en.md index 613f0ed4..034e2786 100755 --- a/deploy/hubserving/readme_en.md +++ b/deploy/hubserving/readme_en.md @@ -3,24 +3,23 @@ English | [简体中文](readme.md) - [Service deployment based on PaddleHub Serving](#service-deployment-based-on-paddlehub-serving) - [1. Update](#1-update) - [2. Quick start service](#2-quick-start-service) - - [2.1 Prepare the environment](#21-prepare-the-environment) + - [2.1 Install PaddleHub](#21-install-paddlehub) - [2.2 Download inference model](#22-download-inference-model) - [2.3 Install Service Module](#23-install-service-module) - [2.4 Start service](#24-start-service) - [2.4.1 Start with command line parameters (CPU only)](#241-start-with-command-line-parameters-cpu-only) - - [2.4.2 Start with configuration file(CPU、GPU)](#242-start-with-configuration-filecpugpu) + - [2.4.2 Start with configuration file(CPU and GPU)](#242-start-with-configuration-filecpugpu) - [3. Send prediction requests](#3-send-prediction-requests) - [4. Returned result format](#4-returned-result-format) - - [5. User defined service module modification](#5-user-defined-service-module-modification) - + - [5. User-defined service module modification](#5-user-defined-service-module-modification) PaddleOCR provides 2 service deployment methods: -- Based on **PaddleHub Serving**: Code path is "`./deploy/hubserving`". Please follow this tutorial. -- Based on **PaddleServing**: Code path is "`./deploy/pdserving`". Please refer to the [tutorial](../../deploy/pdserving/README.md) for usage. +- Based on **PaddleHub Serving**: Code path is `./deploy/hubserving`. Please follow this tutorial. +- Based on **PaddleServing**: Code path is `./deploy/pdserving`. Please refer to the [tutorial](../../deploy/pdserving/README.md) for usage. -# Service deployment based on PaddleHub Serving +# Service deployment based on PaddleHub Serving -The hubserving service deployment directory includes seven service packages: text detection, text angle class, text recognition, text detection+text angle class+text recognition three-stage series connection, layout analysis, table recognition and PP-Structure. Please select the corresponding service package to install and start service according to your needs. The directory is as follows: +The hubserving service deployment directory includes seven service packages: text detection, text angle class, text recognition, text detection+text angle class+text recognition three-stage series connection, layout analysis, table recognition, and PP-Structure. Please select the corresponding service package to install and start the service according to your needs. The directory is as follows: ``` deploy/hubserving/ └─ ocr_det text detection module service package @@ -34,13 +33,13 @@ deploy/hubserving/ └─ kie_ser_re KIE(SER+RE) service package ``` -Each service pack contains 3 files. Take the 2-stage series connection service package as an example, the directory is as follows: +Each service pack contains 3 files. Take the 2-stage series connection service package as an example, the directory is as follows: ``` deploy/hubserving/ocr_system/ └─ __init__.py Empty file, required └─ config.json Configuration file, optional, passed in as a parameter when using configuration to start the service └─ module.py Main module file, required, contains the complete logic of the service - └─ params.py Parameter file, required, including parameters such as model path, pre- and post-processing parameters + └─ params.py Parameter file, required, including parameters such as model path, pre and post-processing parameters ``` ## 1. Update @@ -49,124 +48,76 @@ deploy/hubserving/ocr_system/ * 2022.03.30 add PP-Structure and table recognition services. * 2022.05.05 add PP-OCRv3 text detection and recognition services. - ## 2. Quick start service The following steps take the 2-stage series service as an example. If only the detection service or recognition service is needed, replace the corresponding file path. -### 2.1 Prepare the environment -```shell -# Install paddlehub -# python>3.6.2 is required bt paddlehub -pip3 install paddlehub==2.1.0 --upgrade -i https://pypi.tuna.tsinghua.edu.cn/simple +### 2.1 Install PaddleHub +```bash +pip3 install paddlehub==2.1.0 --upgrade ``` ### 2.2 Download inference model -Before installing the service module, you need to prepare the inference model and put it in the correct path. By default, the PP-OCRv3 models are used, and the default model path is: -``` -text detection model: ./inference/ch_PP-OCRv3_det_infer/ -text recognition model: ./inference/ch_PP-OCRv3_rec_infer/ -text angle classifier: ./inference/ch_ppocr_mobile_v2.0_cls_infer/ -layout parse model: ./inference/picodet_lcnet_x1_0_fgd_layout_infer/ -tanle recognition: ./inference/ch_ppstructure_mobile_v2.0_SLANet_infer/ -KIE(SER): ./inference/ser_vi_layoutxlm_xfund_infer/ -KIE(SER+RE): ./inference/re_vi_layoutxlm_xfund_infer/ -``` - -**The model path can be found and modified in `params.py`.** More models provided by PaddleOCR can be obtained from the [model library](../../doc/doc_en/models_list_en.md). You can also use models trained by yourself. +Before installing the service module, you need to prepare the inference model and put it in the correct path. By default, the PP-OCRv3 models are used, and the default model path is: +| Model | Path | +| ------- | - | +| text detection model | ./inference/ch_PP-OCRv3_det_infer/ | +| text recognition model | ./inference/ch_PP-OCRv3_rec_infer/ | +| text angle classifier | ./inference/ch_ppocr_mobile_v2.0_cls_infer/ | +| layout parse model | ./inference/picodet_lcnet_x1_0_fgd_layout_infer/ | +| tanle recognition | ./inference/ch_ppstructure_mobile_v2.0_SLANet_infer/ | +| KIE(SER) | ./inference/ser_vi_layoutxlm_xfund_infer/ | +| KIE(SER+RE) | ./inference/re_vi_layoutxlm_xfund_infer/ | + +**The model path can be found and modified in `params.py`.** +More models provided by PaddleOCR can be obtained from the [model library](../../doc/doc_en/models_list_en.md). You can also use models trained by yourself. ### 2.3 Install Service Module PaddleOCR provides 5 kinds of service modules, install the required modules according to your needs. -* On Linux platform, the examples are as follows. -```shell -# Install the text detection service module: -hub install deploy/hubserving/ocr_det/ - -# Or, install the text angle class service module: -hub install deploy/hubserving/ocr_cls/ - -# Or, install the text recognition service module: -hub install deploy/hubserving/ocr_rec/ - -# Or, install the 2-stage series service module: -hub install deploy/hubserving/ocr_system/ - -# Or install table recognition service module -hub install deploy/hubserving/structure_table/ - -# Or install PP-Structure service module -hub install deploy/hubserving/structure_system/ - -# Or install KIE(SER) service module -hub install deploy/hubserving/kie_ser/ - -# Or install KIE(SER+RE) service module -hub install deploy/hubserving/kie_ser_re/ -``` - -* On Windows platform, the examples are as follows. -```shell -# Install the detection service module: -hub install deploy\hubserving\ocr_det\ - -# Or, install the angle class service module: -hub install deploy\hubserving\ocr_cls\ - -# Or, install the recognition service module: -hub install deploy\hubserving\ocr_rec\ - -# Or, install the 2-stage series service module: -hub install deploy\hubserving\ocr_system\ - -# Or install table recognition service module -hub install deploy/hubserving/structure_table/ - -# Or install PP-Structure service module -hub install deploy\hubserving\structure_system\ - -# Or install layout analysis service module -hub install deploy\hubserving\structure_layout\ - -# Or install KIE(SER) service module -hub install deploy\hubserving\kie_ser\ - -# Or install KIE(SER+RE) service module -hub install deploy\hubserving\kie_ser_re\ -``` +* On the Linux platform(replace `/` with `\` if using Windows), the examples are as the following table: +| Service model | Command | +| text detection | `hub install deploy/hubserving/ocr_det` | +| text angle class: | `hub install deploy/hubserving/ocr_cls` | +| text recognition: | `hub install deploy/hubserving/ocr_rec` | +| 2-stage series: | `hub install deploy/hubserving/ocr_system` | +| table recognition | `hub install deploy/hubserving/structure_table` | +| PP-Structure | `hub install deploy/hubserving/structure_system` | +| KIE(SER) | `hub install deploy/hubserving/kie_ser` | +| KIE(SER+RE) | `hub install deploy/hubserving/kie_ser_re` | ### 2.4 Start service #### 2.4.1 Start with command line parameters (CPU only) +**start command:** +```bash +hub serving start --modules Module1==Version1, Module2==Version2, ... \ + --port 8866 \ + --use_multiprocess \ + --workers \ +``` + +**Parameters:** +|parameters|usage| +|---|---| +|`--modules`/`-m`|PaddleHub Serving pre-installed model, listed in the form of multiple Module==Version key-value pairs
**When Version is not specified, the latest version is selected by default**| +|`--port`/`-p`|Service port, default is 8866| +|`--use_multiprocess`|Enable concurrent mode, by default using the single-process mode, this mode is recommended for multi-core CPU machines
**Windows operating system only supports single-process mode**| +|`--workers`|The number of concurrent tasks specified in concurrent mode, the default is `2*cpu_count-1`, where `cpu_count` is the number of CPU cores| -**start command:** -```shell -$ hub serving start --modules [Module1==Version1, Module2==Version2, ...] \ - --port XXXX \ - --use_multiprocess \ - --workers \ -``` -**parameters:** - -|parameters|usage| -|---|---| -|--modules/-m|PaddleHub Serving pre-installed model, listed in the form of multiple Module==Version key-value pairs
*`When Version is not specified, the latest version is selected by default`*| -|--port/-p|Service port, default is 8866| -|--use_multiprocess|Enable concurrent mode, the default is single-process mode, this mode is recommended for multi-core CPU machines
*`Windows operating system only supports single-process mode`*| -|--workers|The number of concurrent tasks specified in concurrent mode, the default is `2*cpu_count-1`, where `cpu_count` is the number of CPU cores| - -For example, start the 2-stage series service: -```shell +For example, start the 2-stage series service: +```bash hub serving start -m ocr_system -``` +``` -This completes the deployment of a service API, using the default port number 8866. +This completes the deployment of a service API, using the default port number 8866. -#### 2.4.2 Start with configuration file(CPU、GPU) -**start command:** -```shell +#### 2.4.2 Start with configuration file(CPU and GPU) +**start command:** +```bash hub serving start --config/-c config.json -``` -Wherein, the format of `config.json` is as follows: -```python +``` + +In which the format of `config.json` is as follows: +```json { "modules_info": { "ocr_system": { @@ -183,51 +134,61 @@ Wherein, the format of `config.json` is as follows: "workers": 2 } ``` -- The configurable parameters in `init_args` are consistent with the `_initialize` function interface in `module.py`. Among them, **when `use_gpu` is `true`, it means that the GPU is used to start the service**. +- The configurable parameters in `init_args` are consistent with the `_initialize` function interface in `module.py`. + + **When `use_gpu` is `true`, it means that the GPU is used to start the service**. - The configurable parameters in `predict_args` are consistent with the `predict` function interface in `module.py`. -**Note:** -- When using the configuration file to start the service, other parameters will be ignored. -- If you use GPU prediction (that is, `use_gpu` is set to `true`), you need to set the environment variable CUDA_VISIBLE_DEVICES before starting the service, such as: ```export CUDA_VISIBLE_DEVICES=0```, otherwise you do not need to set it. -- **`use_gpu` and `use_multiprocess` cannot be `true` at the same time.** + **Note:** + - When using the configuration file to start the service, other parameters will be ignored. + - If you use GPU prediction (that is, `use_gpu` is set to `true`), you need to set the environment variable CUDA_VISIBLE_DEVICES before starting the service, such as: + ```bash + export CUDA_VISIBLE_DEVICES=0 + ``` + - **`use_gpu` and `use_multiprocess` cannot be `true` at the same time.** For example, use GPU card No. 3 to start the 2-stage series service: -```shell +```bash export CUDA_VISIBLE_DEVICES=3 hub serving start -c deploy/hubserving/ocr_system/config.json -``` +``` ## 3. Send prediction requests -After the service starts, you can use the following command to send a prediction request to obtain the prediction result: -```shell +After the service starts, you can use the following command to send a prediction request to obtain the prediction result: +```bash python tools/test_hubserving.py --server_url=server_url --image_dir=image_path -``` +``` Two parameters need to be passed to the script: -- **server_url**:service address,format of which is -`http://[ip_address]:[port]/predict/[module_name]` -For example, if using the configuration file to start the text angle classification, text detection, text recognition, detection+classification+recognition 3 stages, table recognition and PP-Structure service, then the `server_url` to send the request will be: - -`http://127.0.0.1:8865/predict/ocr_det` -`http://127.0.0.1:8866/predict/ocr_cls` -`http://127.0.0.1:8867/predict/ocr_rec` -`http://127.0.0.1:8868/predict/ocr_system` -`http://127.0.0.1:8869/predict/structure_table` -`http://127.0.0.1:8870/predict/structure_system` -`http://127.0.0.1:8870/predict/structure_layout` -`http://127.0.0.1:8871/predict/kie_ser` -`http://127.0.0.1:8872/predict/kie_ser_re` -- **image_dir**:Test image path, can be a single image path or an image directory path -- **visualize**:Whether to visualize the results, the default value is False -- **output**:The floder to save Visualization result, default value is `./hubserving_result` - -**Eg.** -```shell +- **server_url**:service address, the format of which is +`http://[ip_address]:[port]/predict/[module_name]` + + For example, if using the configuration file to start the text angle classification, text detection, text recognition, detection+classification+recognition 3 stages, table recognition and PP-Structure service, + + also modified the port for each service, then the `server_url` to send the request will be: + + ``` + http://127.0.0.1:8865/predict/ocr_det + http://127.0.0.1:8866/predict/ocr_cls + http://127.0.0.1:8867/predict/ocr_rec + http://127.0.0.1:8868/predict/ocr_system + http://127.0.0.1:8869/predict/structure_table + http://127.0.0.1:8870/predict/structure_system + http://127.0.0.1:8870/predict/structure_layout + http://127.0.0.1:8871/predict/kie_ser + http://127.0.0.1:8872/predict/kie_ser_re + ``` +- **image_dir**:Test image path, which can be a single image path or an image directory path +- **visualize**:Whether to visualize the results, the default value is False +- **output**:The folder to save the Visualization result, the default value is `./hubserving_result` + +Example: +```bash python tools/test_hubserving.py --server_url=http://127.0.0.1:8868/predict/ocr_system --image_dir=./doc/imgs/ --visualize=false` ``` ## 4. Returned result format -The returned result is a list. Each item in the list is a dict. The dict may contain three fields. The information is as follows: +The returned result is a list. Each item in the list is a dictionary which may contain three fields. The information is as follows: |field name|data type|description| |----|----|----| @@ -235,45 +196,54 @@ The returned result is a list. Each item in the list is a dict. The dict may con |text|str|text content| |confidence|float|text recognition confidence| |text_region|list|text location coordinates| -|html|str|table html str| -|regions|list|The result of layout analysis + table recognition + OCR, each item is a list, including `bbox` indicating area coordinates, `type` of area type and `res` of area results| +|html|str|table HTML string| +|regions|list|The result of layout analysis + table recognition + OCR, each item is a list
including `bbox` indicating area coordinates, `type` of area type and `res` of area results| |layout|list|The result of layout analysis, each item is a dict, including `bbox` indicating area coordinates, `label` of area type| -The fields returned by different modules are different. For example, the results returned by the text recognition service module do not contain `text_region`. The details are as follows: +The fields returned by different modules are different. For example, the results returned by the text recognition service module do not contain `text_region`, detailed table is as follows: -| field name/module name | ocr_det | ocr_cls | ocr_rec | ocr_system | structure_table | structure_system | structure_layout | kie_ser | kie_re | -| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | -|angle| | ✔ | | ✔ | ||| -|text| | |✔|✔| | ✔ | | ✔ | ✔ | -|confidence| |✔ |✔| | | ✔| |✔ | ✔ | -|text_region| ✔| | |✔ | | ✔| |✔ | ✔ | -|html| | | | |✔ |✔||| | -|regions| | | | |✔ |✔ | || | -|layout| | | | | | | ✔ || | -|ser_res| | | | | | | | ✔ | | -|re_res| | | | | | | | | ✔ | +|field name/module name |ocr_det |ocr_cls |ocr_rec |ocr_system |structure_table |structure_system |structure_layout |kie_ser |kie_re | +|--- |--- |--- |--- |--- |--- |--- |--- |--- |--- | +|angle | |✔ | |✔ | | | | +|text | | |✔ |✔ | |✔ | |✔ |✔ | +|confidence | |✔ |✔ |✔ | |✔ | |✔ |✔ | +|text_region |✔ | | |✔ | |✔ | |✔ |✔ | +|html | | | | |✔ |✔ | | | | +|regions | | | | |✔ |✔ | | | | +|layout | | | | | | |✔ | | | +|ser_res | | | | | | | |✔ | | +|re_res | | | | | | | | |✔ | -**Note:** If you need to add, delete or modify the returned fields, you can modify the file `module.py` of the corresponding module. For the complete process, refer to the user-defined modification service module in the next section. +**Note:** If you need to add, delete or modify the returned fields, you can modify the file `module.py` of the corresponding module. For the complete process, refer to the user-defined modification service module in the next section. -## 5. User defined service module modification -If you need to modify the service logic, the following steps are generally required (take the modification of `ocr_system` for example): +## 5. User-defined service module modification +If you need to modify the service logic, the following steps are generally required (take the modification of `deploy/hubserving/ocr_system` for example): -- 1. Stop service -```shell +1. Stop service: +```bash hub serving stop --port/-p XXXX ``` -- 2. Modify the code in the corresponding files, like `module.py` and `params.py`, according to the actual needs. -For example, if you need to replace the model used by the deployed service, you need to modify model path parameters `det_model_dir` and `rec_model_dir` in `params.py`. If you want to turn off the text direction classifier, set the parameter `use_angle_cls` to `False`. Of course, other related parameters may need to be modified at the same time. Please modify and debug according to the actual situation. It is suggested to run `module.py` directly for debugging after modification before starting the service test. -**Note** The image input shape used by the PPOCR-v3 recognition model is `3, 48, 320`, so you need to modify `cfg.rec_image_shape = "3, 48, 320"` in `params.py`, if you do not use the PPOCR-v3 recognition model, then there is no need to modify this parameter. -- 3. Uninstall old service module -```shell -hub uninstall ocr_system -``` -- 4. Install modified service module -```shell -hub install deploy/hubserving/ocr_system/ -``` -- 5. Restart service -```shell -hub serving start -m ocr_system -``` +2. Modify the code in the corresponding files under `deploy/hubserving/ocr_system`, such as `module.py` and `params.py`, to your actual needs. + + For example, if you need to replace the model used by the deployed service, you need to modify model path parameters `det_model_dir` and `rec_model_dir` in `params.py`. If you want to turn off the text direction classifier, set the parameter `use_angle_cls` to `False`. + + Of course, other related parameters may need to be modified at the same time. Please modify and debug according to the actual situation. + + **It is suggested to run `module.py` directly for debugging after modification before starting the service test.** + + **Note** The image input shape used by the PPOCR-v3 recognition model is `3, 48, 320`, so you need to modify `cfg.rec_image_shape = "3, 48, 320"` in `params.py`, if you do not use the PPOCR-v3 recognition model, then there is no need to modify this parameter. +3. (Optional) If you want to rename the module, the following lines should be modified: + - [`ocr_system` within `from deploy.hubserving.ocr_system.params import read_params`](https://github.com/PaddlePaddle/PaddleOCR/blob/a923f35de57b5e378f8dd16e54d0a3e4f51267fd/deploy/hubserving/ocr_system/module.py#L35) + - [`ocr_system` within `name="ocr_system",`](https://github.com/PaddlePaddle/PaddleOCR/blob/a923f35de57b5e378f8dd16e54d0a3e4f51267fd/deploy/hubserving/ocr_system/module.py#L39) +4. (Optional) It may require you to delete the directory `__pycache__` to force flush build cache of CPython: + ```bash + find deploy/hubserving/ocr_system -name '__pycache__' -exec rm -r {} \; + ``` +5. Install modified service module: + ```bash + hub install deploy/hubserving/ocr_system/ + ``` +6. Restart service: + ```bash + hub serving start -m ocr_system + ``` diff --git a/deploy/pdserving/README_CN.md b/deploy/pdserving/README_CN.md index ab05b766..be314b9e 100644 --- a/deploy/pdserving/README_CN.md +++ b/deploy/pdserving/README_CN.md @@ -106,13 +106,13 @@ python3 -m paddle_serving_client.convert --dirname ./ch_PP-OCRv3_rec_infer/ \ 检测模型转换完成后,会在当前文件夹多出`ppocr_det_v3_serving` 和`ppocr_det_v3_client`的文件夹,具备如下格式: ``` |- ppocr_det_v3_serving/ - |- __model__ + |- __model__ |- __params__ - |- serving_server_conf.prototxt + |- serving_server_conf.prototxt |- serving_server_conf.stream.prototxt |- ppocr_det_v3_client - |- serving_client_conf.prototxt + |- serving_client_conf.prototxt |- serving_client_conf.stream.prototxt ``` @@ -232,6 +232,7 @@ cp -rf general_detection_op.cpp Serving/core/general-server/op # 启动服务,运行日志保存在log.txt python3 -m paddle_serving_server.serve --model ppocr_det_v3_serving ppocr_rec_v3_serving --op GeneralDetectionOp GeneralInferOp --port 8181 &>log.txt & ``` + 成功启动服务后,log.txt中会打印类似如下日志 ![](./imgs/start_server.png) diff --git a/doc/doc_ch/models_list.md b/doc/doc_ch/models_list.md index c6cbd687..8082fbc3 100644 --- a/doc/doc_ch/models_list.md +++ b/doc/doc_ch/models_list.md @@ -101,6 +101,7 @@ PaddleOCR提供的可下载模型包括`推理模型`、`训练模型`、`预训 |en_number_mobile_slim_v2.0_rec|slim裁剪量化版超轻量模型,支持英文、数字识别|[rec_en_number_lite_train.yml](../../configs/rec/multi_language/rec_en_number_lite_train.yml)| 2.7M | [推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/en_number_mobile_v2.0_rec_slim_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/en_number_mobile_v2.0_rec_slim_train.tar) | |en_number_mobile_v2.0_rec|原始超轻量模型,支持英文、数字识别|[rec_en_number_lite_train.yml](../../configs/rec/multi_language/rec_en_number_lite_train.yml)|2.6M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_number_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_number_mobile_v2.0_rec_train.tar) | +**注意:** 所有英文识别模型的字典文件均为`ppocr/utils/en_dict.txt` ### 2.3 多语言识别模型(更多语言持续更新中...) @@ -146,3 +147,4 @@ Paddle-Lite 是一个高性能、轻量级、灵活性强且易于扩展的深 |PP-OCRv2(slim)|蒸馏版超轻量中文OCR移动端模型|4.9M|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_slim_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_cls_slim_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_slim_opt.nb)|v2.9| |V2.0|ppocr_v2.0超轻量中文OCR移动端模型|7.8M|[下载地址](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_det_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_cls_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_rec_opt.nb)|v2.9| |V2.0(slim)|ppocr_v2.0超轻量中文OCR移动端模型|3.3M|[下载地址](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_det_slim_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_cls_slim_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_rec_slim_opt.nb)|v2.9| + diff --git a/doc/doc_en/models_list_en.md b/doc/doc_en/models_list_en.md index 168d63d3..a56f72d8 100644 --- a/doc/doc_en/models_list_en.md +++ b/doc/doc_en/models_list_en.md @@ -1,8 +1,8 @@ # OCR Model List(V3, updated on 2022.4.28) > **Note** -> 1. Compared with the model v2, the 3rd version of the detection model has a improvement in accuracy, and the 2.1 version of the recognition model has optimizations in accuracy and speed with CPU. +> 1. Compared with model v2, the 3rd version of the detection model has an improvement in accuracy, and the 2.1 version of the recognition model has optimizations in accuracy and speed with CPU. > 2. Compared with [models 1.1](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_en/models_list_en.md), which are trained with static graph programming paradigm, models 2.0 or higher are the dynamic graph trained version and achieve close performance. -> 3. All models in this tutorial are all ppocr-series models, for more introduction of algorithms and models based on public dataset, you can refer to [algorithm overview tutorial](./algorithm_overview_en.md). +> 3. All models in this tutorial are from the PaddleOCR series, for more introduction to algorithms and models based on the public dataset, you can refer to [algorithm overview tutorial](./algorithm_overview_en.md). - [OCR Model List(V3, updated on 2022.4.28)]() - [1. Text Detection Model](#1-text-detection-model) @@ -16,15 +16,15 @@ - [3. Text Angle Classification Model](#3-text-angle-classification-model) - [4. Paddle-Lite Model](#4-paddle-lite-model) -The downloadable models provided by PaddleOCR include `inference model`, `trained model`, `pre-trained model` and `nb model`. The differences between the models are as follows: +The downloadable models provided by PaddleOCR include the `inference model`, `trained model`, `pre-trained model` and `nb model`. The differences between the models are as follows: |model type|model format|description| |--- | --- | --- | |inference model|inference.pdmodel、inference.pdiparams|Used for inference based on Paddle inference engine,[detail](./inference_ppocr_en.md)| -|trained model, pre-trained model|\*.pdparams、\*.pdopt、\*.states |The checkpoints model saved in the training process, which stores the parameters of the model, mostly used for model evaluation and continuous training.| +|trained model, pre-trained model|\*.pdparams、\*.pdopt、\*.states |The checkpoints model saved in the training process, which stores the parameters of the model, is mostly used for model evaluation and continuous training.| |nb model|\*.nb| Model optimized by Paddle-Lite, which is suitable for mobile-side deployment scenarios (Paddle-Lite is needed for nb model deployment). | -Relationship of the above models is as follows. +The relationship of the above models is as follows. ![](../imgs_en/model_prod_flow_en.png) @@ -51,10 +51,10 @@ Relationship of the above models is as follows. |model name|description|config|model size|download| | --- | --- | --- | --- | --- | -|en_PP-OCRv3_det_slim | [New] Slim qunatization with distillation lightweight detection model, supporting English | [ch_PP-OCRv3_det_cml.yml](../../configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_cml.yml) | 1.1M |[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_slim_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_slim_distill_train.tar) / [nb model](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_slim_infer.nb) | +|en_PP-OCRv3_det_slim | [New] Slim quantization with distillation lightweight detection model, supporting English | [ch_PP-OCRv3_det_cml.yml](../../configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_cml.yml) | 1.1M |[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_slim_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_slim_distill_train.tar) / [nb model](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_slim_infer.nb) | |en_PP-OCRv3_det | [New] Original lightweight detection model, supporting English |[ch_PP-OCRv3_det_cml.yml](../../configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_cml.yml)| 3.8M | [inference model](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_distill_train.tar) | -* Note: English configuration file is same as Chinese except training data, here we only provide one configuration file. +* Note: English configuration file is the same as Chinese except for training data, here we only provide one configuration file. @@ -62,10 +62,10 @@ Relationship of the above models is as follows. |model name|description|config|model size|download| | --- | --- | --- | --- | --- | -| ml_PP-OCRv3_det_slim | [New] Slim qunatization with distillation lightweight detection model, supporting English | [ch_PP-OCRv3_det_cml.yml](../../configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_cml.yml) | 1.1M | [inference model](https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/Multilingual_PP-OCRv3_det_slim_infer.tar) / [trained model ](https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/Multilingual_PP-OCRv3_det_slim_distill_train.tar) / [nb model](https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/Multilingual_PP-OCRv3_det_slim_infer.nb) | +| ml_PP-OCRv3_det_slim | [New] Slim quantization with distillation lightweight detection model, supporting English | [ch_PP-OCRv3_det_cml.yml](../../configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_cml.yml) | 1.1M | [inference model](https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/Multilingual_PP-OCRv3_det_slim_infer.tar) / [trained model ](https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/Multilingual_PP-OCRv3_det_slim_distill_train.tar) / [nb model](https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/Multilingual_PP-OCRv3_det_slim_infer.nb) | | ml_PP-OCRv3_det |[New] Original lightweight detection model, supporting English | [ch_PP-OCRv3_det_cml.yml](../../configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_cml.yml)| 3.8M | [inference model](https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/Multilingual_PP-OCRv3_det_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/Multilingual_PP-OCRv3_det_distill_train.tar) | -* Note: English configuration file is same as Chinese except training data, here we only provide one configuration file. +* Note: English configuration file is the same as Chinese except for training data, here we only provide one configuration file. ## 2. Text Recognition Model @@ -75,27 +75,29 @@ Relationship of the above models is as follows. |model name|description|config|model size|download| | --- | --- | --- | --- | --- | -|ch_PP-OCRv3_rec_slim | [New] Slim qunatization with distillation lightweight model, supporting Chinese, English text recognition |[ch_PP-OCRv3_rec_distillation.yml](../../configs/rec/PP-OCRv3/ch_PP-OCRv3_rec_distillation.yml)| 4.9M |[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_train.tar) / [nb model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_infer.nb) | +|ch_PP-OCRv3_rec_slim | [New] Slim quantization with distillation lightweight model, supporting Chinese, English text recognition |[ch_PP-OCRv3_rec_distillation.yml](../../configs/rec/PP-OCRv3/ch_PP-OCRv3_rec_distillation.yml)| 4.9M |[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_train.tar) / [nb model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_infer.nb) | |ch_PP-OCRv3_rec| [New] Original lightweight model, supporting Chinese, English, multilingual text recognition |[ch_PP-OCRv3_rec_distillation.yml](../../configs/rec/PP-OCRv3/ch_PP-OCRv3_rec_distillation.yml)| 12.4M |[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_train.tar) | -|ch_PP-OCRv2_rec_slim| Slim qunatization with distillation lightweight model, supporting Chinese, English text recognition|[ch_PP-OCRv2_rec.yml](../../configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec.yml)| 9.0M |[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_slim_quant_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_slim_quant_train.tar) | -|ch_PP-OCRv2_rec| Original lightweight model, supporting Chinese, English, multilingual text recognition |[ch_PP-OCRv2_rec_distillation.yml](../../configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_distillation.yml)|8.5M|[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_train.tar) | +|ch_PP-OCRv2_rec_slim| Slim quantization with distillation lightweight model, supporting Chinese, English text recognition|[ch_PP-OCRv2_rec.yml](../../configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec.yml)| 9.0M |[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_slim_quant_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_slim_quant_train.tar) | +|ch_PP-OCRv2_rec| Original lightweight model, supporting Chinese, English, and multilingual text recognition |[ch_PP-OCRv2_rec_distillation.yml](../../configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_distillation.yml)|8.5M|[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_train.tar) | |ch_ppocr_mobile_slim_v2.0_rec|Slim pruned and quantized lightweight model, supporting Chinese, English and number recognition|[rec_chinese_lite_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml)| 6.0M | [inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_slim_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_slim_train.tar) | |ch_ppocr_mobile_v2.0_rec|Original lightweight model, supporting Chinese, English and number recognition|[rec_chinese_lite_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml)|5.2M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_train.tar) / [pre-trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_pre.tar) | |ch_ppocr_server_v2.0_rec|General model, supporting Chinese, English and number recognition|[rec_chinese_common_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_common_train_v2.0.yml)|94.8M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_train.tar) / [pre-trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_pre.tar) | -**Note:** The `trained model` is fine-tuned on the `pre-trained model` with real data and synthesized vertical text data, which achieved better performance in real scene. The `pre-trained model` is directly trained on the full amount of real data and synthesized data, which is more suitable for fine-tune on your own dataset. +**Note:** The `trained model` is fine-tuned on the `pre-trained model` with real data and synthesized vertical text data, which achieved better performance in the real scene. The `pre-trained model` is directly trained on the full amount of real data and synthesized data, which is more suitable for fine-tuning your dataset. ### 2.2 English Recognition Model |model name|description|config|model size|download| | --- | --- | --- | --- | --- | -|en_PP-OCRv3_rec_slim | [New] Slim qunatization with distillation lightweight model, supporting english, English text recognition |[en_PP-OCRv3_rec.yml](../../configs/rec/PP-OCRv3/en_PP-OCRv3_rec.yml)| 3.2M |[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_slim_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_slim_train.tar) / [nb model](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_slim_infer.nb) | -|en_PP-OCRv3_rec| [New] Original lightweight model, supporting english, English, multilingual text recognition |[en_PP-OCRv3_rec.yml](../../configs/rec/PP-OCRv3/en_PP-OCRv3_rec.yml)| 9.6M |[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_train.tar) | +|en_PP-OCRv3_rec_slim | [New] Slim quantization with distillation lightweight model, supporting English, English text recognition |[en_PP-OCRv3_rec.yml](../../configs/rec/PP-OCRv3/en_PP-OCRv3_rec.yml)| 3.2M |[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_slim_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_slim_train.tar) / [nb model](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_slim_infer.nb) | +|en_PP-OCRv3_rec| [New] Original lightweight model, supporting English, English, multilingual text recognition |[en_PP-OCRv3_rec.yml](../../configs/rec/PP-OCRv3/en_PP-OCRv3_rec.yml)| 9.6M |[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_train.tar) | |en_number_mobile_slim_v2.0_rec|Slim pruned and quantized lightweight model, supporting English and number recognition|[rec_en_number_lite_train.yml](../../configs/rec/multi_language/rec_en_number_lite_train.yml)| 2.7M | [inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/en_number_mobile_v2.0_rec_slim_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/en_number_mobile_v2.0_rec_slim_train.tar) | |en_number_mobile_v2.0_rec|Original lightweight model, supporting English and number recognition|[rec_en_number_lite_train.yml](../../configs/rec/multi_language/rec_en_number_lite_train.yml)|2.6M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_number_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_number_mobile_v2.0_rec_train.tar) | +**Note:** Dictionary file of all English recognition models is `ppocr/utils/en_dict.txt`. + ### 2.3 Multilingual Recognition Model(Updating...) @@ -112,7 +114,7 @@ Relationship of the above models is as follows. | cyrillic_PP-OCRv3_rec | ppocr/utils/dict/cyrillic_dict.txt | Lightweight model for cyrillic recognition | [cyrillic_PP-OCRv3_rec.yml](../../configs/rec/PP-OCRv3/multi_language/cyrillic_PP-OCRv3_rec.yml) |9.6M|[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/cyrillic_PP-OCRv3_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/cyrillic_PP-OCRv3_rec_train.tar) | | devanagari_PP-OCRv3_rec | ppocr/utils/dict/devanagari_dict.txt | Lightweight model for devanagari recognition | [devanagari_PP-OCRv3_rec.yml](../../configs/rec/PP-OCRv3/multi_language/devanagari_PP-OCRv3_rec.yml) |9.9M|[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/devanagari_PP-OCRv3_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/devanagari_PP-OCRv3_rec_train.tar) | -For a complete list of languages ​​and tutorials, please refer to : [Multi-language model](./multi_languages_en.md) +For a complete list of languages ​​and tutorials, please refer to [Multi-language model](./multi_languages_en.md) ## 3. Text Angle Classification Model @@ -125,9 +127,9 @@ For a complete list of languages ​​and tutorials, please refer to : [Multi-l ## 4. Paddle-Lite Model -Paddle Lite is an updated version of Paddle-Mobile, an open-open source deep learning framework designed to make it easy to perform inference on mobile, embeded, and IoT devices. It can further optimize the inference model and generate `nb model` used for edge devices. It's suggested to optimize the quantization model using Paddle-Lite because `INT8` format is used for the model storage and inference. +Paddle Lite is an updated version of Paddle-Mobile, an open-open source deep learning framework designed to make it easy to perform inference on mobile, embedded, and IoT devices. It can further optimize the inference model and generate the `nb model` used for edge devices. It's suggested to optimize the quantization model using Paddle-Lite because the `INT8` format is used for the model storage and inference. -This chapter lists OCR nb models with PP-OCRv2 or earlier versions. You can access to the latest nb models from the above tables. +This chapter lists OCR nb models with PP-OCRv2 or earlier versions. You can access the latest nb models from the above tables. |Version|Introduction|Model size|Detection model|Text Direction model|Recognition model|Paddle-Lite branch| |---|---|---|---|---|---|---| diff --git a/setup.py b/setup.py index 3aa0a170..f308fddc 100644 --- a/setup.py +++ b/setup.py @@ -43,7 +43,7 @@ setup( version=VERSION, install_requires=load_requirements(['requirements.txt', 'ppstructure/recovery/requirements.txt']), license='Apache License 2.0', - description='Awesome OCR toolkits based on PaddlePaddle (8.6M ultra-lightweight pre-trained model, support training and deployment among server, mobile, embeded and IoT devices', + description='Awesome OCR toolkits based on PaddlePaddle (8.6M ultra-lightweight pre-trained model, support training and deployment among server, mobile, embedded and IoT devices', long_description=readme(), long_description_content_type='text/markdown', url='https://github.com/PaddlePaddle/PaddleOCR', -- Gitee From 795c81f1835725dbf9bf9f5a13e0c793dfd5f243 Mon Sep 17 00:00:00 2001 From: zhenliang li <37922155+shouyong@users.noreply.github.com> Date: Wed, 5 Jul 2023 16:26:32 +0800 Subject: [PATCH 090/112] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E6=B5=8B=E8=AF=95?= =?UTF-8?q?=E6=9C=8D=E5=8A=A1=E4=B8=AD=E5=9B=BE=E7=89=87=E8=BD=ACBase64?= =?UTF-8?q?=E7=9A=84=E5=BC=95=E7=94=A8=E5=9C=B0=E5=9D=80=E9=94=99=E8=AF=AF?= =?UTF-8?q?=E3=80=82=20(#8334)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- deploy/docker/hubserving/README_cn.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/docker/hubserving/README_cn.md b/deploy/docker/hubserving/README_cn.md index 046903c4..b695b799 100644 --- a/deploy/docker/hubserving/README_cn.md +++ b/deploy/docker/hubserving/README_cn.md @@ -42,7 +42,7 @@ docker logs -f paddle_ocr ``` ## 4.测试服务 -a. 计算待识别图片的Base64编码(如果只是测试一下效果,可以通过免费的在线工具实现,如:http://tool.chinaz.com/tools/imgtobase/) +a. 计算待识别图片的Base64编码(如果只是测试一下效果,可以通过免费的在线工具实现,如:http://tool.chinaz.com/tools/imgtobase/ b. 发送服务请求(可参见sample_request.txt中的值) ``` curl -H "Content-Type:application/json" -X POST --data "{\"images\": [\"填入图片Base64编码(需要删除'data:image/jpg;base64,')\"]}" http://localhost:8868/predict/ocr_system -- Gitee From f022613e298617c72456286cfb41415a928f38c4 Mon Sep 17 00:00:00 2001 From: shiyutang <34859558+shiyutang@users.noreply.github.com> Date: Fri, 7 Jul 2023 15:13:37 +0800 Subject: [PATCH 091/112] Update application.md --- doc/doc_ch/application.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/doc_ch/application.md b/doc/doc_ch/application.md index 5135dfac..9105a878 100644 --- a/doc/doc_ch/application.md +++ b/doc/doc_ch/application.md @@ -30,12 +30,12 @@ PaddleOCR场景应用覆盖通用,制造、金融、交通行业的主要OCR | 类别 | 亮点 | 类别 | 亮点 | | -------------- | ------------------------ | ------------ | --------------------- | | 表单VQA | 多模态通用表单结构化提取 | 通用卡证识别 | 通用结构化提取 | -| 增值税发票 | 尽请期待 | 身份证识别 | 结构化提取、图像阴影 | +| 增值税发票 | 敬请期待 | 身份证识别 | 结构化提取、图像阴影 | | 印章检测与识别 | 端到端弯曲文本识别 | 合同比对 | 密集文本检测、NLP串联 | ## 交通 | 类别 | 亮点 | 类别 | 亮点 | | ----------------- | ------------------------------ | ---------- | -------- | -| 车牌识别 | 多角度图像、轻量模型、端侧部署 | 快递单识别 | 尽请期待 | -| 驾驶证/行驶证识别 | 尽请期待 | | | \ No newline at end of file +| 车牌识别 | 多角度图像、轻量模型、端侧部署 | 快递单识别 | 敬请期待 | +| 驾驶证/行驶证识别 | 敬请期待 | | | -- Gitee From 1213cb2171b59e9b95db53e9d2401e478de76997 Mon Sep 17 00:00:00 2001 From: shiyutang <34859558+shiyutang@users.noreply.github.com> Date: Fri, 7 Jul 2023 15:13:59 +0800 Subject: [PATCH 092/112] [Doc] Fix 404 link. (#10318) * Update PP-OCRv3_det_train.md * Update knowledge_distillation.md --- doc/doc_ch/PP-OCRv3_det_train.md | 2 +- doc/doc_ch/knowledge_distillation.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/doc_ch/PP-OCRv3_det_train.md b/doc/doc_ch/PP-OCRv3_det_train.md index b3bbc896..c1bfe994 100644 --- a/doc/doc_ch/PP-OCRv3_det_train.md +++ b/doc/doc_ch/PP-OCRv3_det_train.md @@ -32,7 +32,7 @@ PP-OCRv3检测训练包括两个步骤: ### 2.2 训练教师模型 -教师模型训练的配置文件是[ch_PP-OCRv3_det_dml.yml](https://github.com/PaddlePaddle/PaddleOCR/blob/release%2F2.5/configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_dml.yml)。教师模型模型结构的Backbone、Neck、Head分别为Resnet50, LKPAN, DBHead,采用DML的蒸馏方法训练。有关配置文件的详细介绍参考[文档](./knowledge_distillation)。 +教师模型训练的配置文件是[ch_PP-OCRv3_det_dml.yml](https://github.com/PaddlePaddle/PaddleOCR/blob/release%2F2.5/configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_dml.yml)。教师模型模型结构的Backbone、Neck、Head分别为Resnet50, LKPAN, DBHead,采用DML的蒸馏方法训练。有关配置文件的详细介绍参考[文档](./knowledge_distillation.md)。 下载ImageNet预训练模型: diff --git a/doc/doc_ch/knowledge_distillation.md b/doc/doc_ch/knowledge_distillation.md index 79c4418d..f9cbcbfa 100644 --- a/doc/doc_ch/knowledge_distillation.md +++ b/doc/doc_ch/knowledge_distillation.md @@ -69,7 +69,7 @@ PaddleOCR中集成了知识蒸馏的算法,具体地,有以下几个主要 ```yaml Architecture: - model_type: &model_type "rec" # 模型类别,rec、det等,每个子网络的模型类别都与 + model_type: &model_type "rec" # 模型类别,rec、det等,每个子网络的模型类别 name: DistillationModel # 结构名称,蒸馏任务中,为DistillationModel,用于构建对应的结构 algorithm: Distillation # 算法名称 Models: # 模型,包含子网络的配置信息 -- Gitee From d1f3ca06911154848167ef8aaedc27e508044117 Mon Sep 17 00:00:00 2001 From: shiyutang <34859558+shiyutang@users.noreply.github.com> Date: Fri, 7 Jul 2023 15:45:56 +0800 Subject: [PATCH 093/112] Update config.md --- doc/doc_ch/config.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/doc_ch/config.md b/doc/doc_ch/config.md index 41ba8c1f..3430105f 100644 --- a/doc/doc_ch/config.md +++ b/doc/doc_ch/config.md @@ -223,4 +223,4 @@ PaddleOCR目前已支持80种(除中文外)语种识别,`configs/rec/multi | rec_cyrillic_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 斯拉夫字母 | | rec_devanagari_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 梵文字母 | -更多支持语种请参考: [多语言模型](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.1/doc/doc_ch/multi_languages.md#%E8%AF%AD%E7%A7%8D%E7%BC%A9%E5%86%99) +更多支持语种请参考: [多语言模型](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.1/doc/doc_ch/multi_languages.md) -- Gitee From f8cbbce65cc0927c0a175c39c745d4aeedb50080 Mon Sep 17 00:00:00 2001 From: itasli Date: Fri, 7 Jul 2023 08:13:32 +0000 Subject: [PATCH 094/112] Fix fitz camelCase deprecation and .PDF not being recognized as pdf file (#10181) * Fix fitz camelCase deprecation and .PDF not being recognized as pdf file * refactor get_image_file_list function --- ppocr/utils/utility.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/ppocr/utils/utility.py b/ppocr/utils/utility.py index 18357c8e..ebff2fe7 100755 --- a/ppocr/utils/utility.py +++ b/ppocr/utils/utility.py @@ -59,7 +59,6 @@ def get_image_file_list(img_file): if img_file is None or not os.path.exists(img_file): raise Exception("not found any img file in {}".format(img_file)) - img_end = {'jpg', 'bmp', 'png', 'jpeg', 'rgb', 'tif', 'tiff', 'gif', 'pdf'} if os.path.isfile(img_file) and _check_image_file(img_file): imgs_lists.append(img_file) elif os.path.isdir(img_file): @@ -74,7 +73,7 @@ def get_image_file_list(img_file): def check_and_read(img_path): - if os.path.basename(img_path)[-3:] in ['gif', 'GIF']: + if os.path.basename(img_path)[-3:].lower() == 'gif': gif = cv2.VideoCapture(img_path) ret, frame = gif.read() if not ret: @@ -85,19 +84,19 @@ def check_and_read(img_path): frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB) imgvalue = frame[:, :, ::-1] return imgvalue, True, False - elif os.path.basename(img_path)[-3:] in ['pdf']: + elif os.path.basename(img_path)[-3:].lower() == 'pdf': import fitz from PIL import Image imgs = [] with fitz.open(img_path) as pdf: - for pg in range(0, pdf.pageCount): + for pg in range(0, pdf.page_count): page = pdf[pg] mat = fitz.Matrix(2, 2) - pm = page.getPixmap(matrix=mat, alpha=False) + pm = page.get_pixmap(matrix=mat, alpha=False) # if width or height > 2000 pixels, don't enlarge the image if pm.width > 2000 or pm.height > 2000: - pm = page.getPixmap(matrix=fitz.Matrix(1, 1), alpha=False) + pm = page.get_pixmap(matrix=fitz.Matrix(1, 1), alpha=False) img = Image.frombytes("RGB", [pm.width, pm.height], pm.samples) img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR) -- Gitee From 61b3fee29183d38b4ef24347472f4b468e27fc97 Mon Sep 17 00:00:00 2001 From: shiyutang <34859558+shiyutang@users.noreply.github.com> Date: Fri, 7 Jul 2023 16:49:01 +0800 Subject: [PATCH 095/112] Update customize.md (#10325) --- doc/doc_ch/customize.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/doc_ch/customize.md b/doc/doc_ch/customize.md index 5944bf08..3da61ab4 100644 --- a/doc/doc_ch/customize.md +++ b/doc/doc_ch/customize.md @@ -27,4 +27,4 @@ PaddleOCR提供了检测和识别模型的串联工具,可以将训练好的 ``` python3 tools/infer/predict_system.py --image_dir="./doc/imgs/11.jpg" --det_model_dir="./inference/det/" --rec_model_dir="./inference/rec/" ``` -更多的文本检测、识别串联推理使用方式请参考文档教程中的[基于预测引擎推理](./inference.md)。 +更多的文本检测、识别串联推理使用方式请参考文档教程中的[基于预测引擎推理](./algorithm_inference.md)。 -- Gitee From 5128ceaf7448cb70a04734c3b526bc529b47c658 Mon Sep 17 00:00:00 2001 From: shiyutang <34859558+shiyutang@users.noreply.github.com> Date: Tue, 11 Jul 2023 14:35:55 +0800 Subject: [PATCH 096/112] Update FAQ.md (#10345) --- doc/doc_ch/FAQ.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/doc_ch/FAQ.md b/doc/doc_ch/FAQ.md index a4437b8b..3090cfa2 100644 --- a/doc/doc_ch/FAQ.md +++ b/doc/doc_ch/FAQ.md @@ -188,7 +188,7 @@ A:可以看下训练的尺度和预测的尺度是否相同,如果训练的 #### Q: 如何识别招牌或者广告图中的艺术字? -**A**: 招牌或者广告图中的艺术字是文本识别一个非常有挑战性的难题,因为艺术字中的单字和印刷体相比,变化非常大。如果需要识别的艺术字是在一个词典列表内,可以将改每个词典认为是一个待识别图像模板,通过通用图像检索识别系统解决识别问题。可以尝试使用PaddleClas的图像识别系统。 +**A**: 招牌或者广告图中的艺术字是文本识别一个非常有挑战性的难题,因为艺术字中的单字和印刷体相比,变化非常大。如果需要识别的艺术字是在一个词典列表内,可以将该每个词典认为是一个待识别图像模板,通过通用图像检索识别系统解决识别问题。可以尝试使用PaddleClas的图像识别系统PP-shituV2。 #### Q: 图像正常识别出来的文字是OK的,旋转90度后识别出来的结果就比较差,有什么方法可以优化? -- Gitee From fac03876f39bc67acc8eef6d7facb9a2206eeecd Mon Sep 17 00:00:00 2001 From: shiyutang <34859558+shiyutang@users.noreply.github.com> Date: Tue, 11 Jul 2023 14:36:10 +0800 Subject: [PATCH 097/112] Update FAQ.md (#10349) --- doc/doc_ch/FAQ.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/doc_ch/FAQ.md b/doc/doc_ch/FAQ.md index 3090cfa2..531d6491 100644 --- a/doc/doc_ch/FAQ.md +++ b/doc/doc_ch/FAQ.md @@ -400,7 +400,7 @@ StyleText的用途主要是:提取style_image中的字体、背景等style信 A:无论是文字检测,还是文字识别,骨干网络的选择是预测效果和预测效率的权衡。一般,选择更大规模的骨干网络,例如ResNet101_vd,则检测或识别更准确,但预测耗时相应也会增加。而选择更小规模的骨干网络,例如MobileNetV3_small_x0_35,则预测更快,但检测或识别的准确率会大打折扣。幸运的是不同骨干网络的检测或识别效果与在ImageNet数据集图像1000分类任务效果正相关。飞桨图像分类套件PaddleClas汇总了ResNet_vd、Res2Net、HRNet、MobileNetV3、GhostNet等23种系列的分类网络结构,在上述图像分类任务的top1识别准确率,GPU(V100和T4)和CPU(骁龙855)的预测耗时以及相应的117个预训练模型下载地址。 -(1)文字检测骨干网络的替换,主要是确定类似与ResNet的4个stages,以方便集成后续的类似FPN的检测头。此外,对于文字检测问题,使用ImageNet训练的分类预训练模型,可以加速收敛和效果提升。 +(1)文字检测骨干网络的替换,主要是确定类似于ResNet的4个stages,以方便集成后续的类似FPN的检测头。此外,对于文字检测问题,使用ImageNet训练的分类预训练模型,可以加速收敛和效果提升。 (2)文字识别的骨干网络的替换,需要注意网络宽高stride的下降位置。由于文本识别一般宽高比例很大,因此高度下降频率少一些,宽度下降频率多一些。可以参考PaddleOCR中MobileNetV3骨干网络的改动。 -- Gitee From 1dad0a980fda26ecc1479f0ca53d37bde520bec5 Mon Sep 17 00:00:00 2001 From: UserUnknownFactor <63057995+UserUnknownFactor@users.noreply.github.com> Date: Wed, 19 Jul 2023 10:46:29 +0300 Subject: [PATCH 098/112] Don't break overall processing on a bad image (#10216) --- paddleocr.py | 18 ++++++++++++------ tools/infer/predict_system.py | 22 ++++++++++++++++------ 2 files changed, 28 insertions(+), 12 deletions(-) diff --git a/paddleocr.py b/paddleocr.py index 9b7f4e06..61e9482b 100644 --- a/paddleocr.py +++ b/paddleocr.py @@ -512,12 +512,12 @@ class PaddleOCR(predict_system.TextSystem): def ocr(self, img, det=True, rec=True, cls=True): """ - ocr with paddleocr + OCR with PaddleOCR args: - img: img for ocr, support ndarray, img_path and list or ndarray - det: use text detection or not. If false, only rec will be exec. Default is True - rec: use text recognition or not. If false, only det will be exec. Default is True - cls: use angle classifier or not. Default is True. If true, the text with rotation of 180 degrees can be recognized. If no text is rotated by 180 degrees, use cls=False to get better performance. Text with rotation of 90 or 270 degrees can be recognized even if cls=False. + img: img for OCR, support ndarray, img_path and list or ndarray + det: use text detection or not. If False, only rec will be exec. Default is True + rec: use text recognition or not. If False, only det will be exec. Default is True + cls: use angle classifier or not. Default is True. If True, the text with rotation of 180 degrees can be recognized. If no text is rotated by 180 degrees, use cls=False to get better performance. Text with rotation of 90 or 270 degrees can be recognized even if cls=False. """ assert isinstance(img, (np.ndarray, list, str, bytes)) if isinstance(img, list) and det == True: @@ -525,7 +525,7 @@ class PaddleOCR(predict_system.TextSystem): exit(0) if cls == True and self.use_angle_cls == False: logger.warning( - 'Since the angle classifier is not initialized, the angle classifier will not be uesd during the forward process' + 'Since the angle classifier is not initialized, it will not be used during the forward process' ) img = check_img(img) @@ -541,6 +541,9 @@ class PaddleOCR(predict_system.TextSystem): ocr_res = [] for idx, img in enumerate(imgs): dt_boxes, rec_res, _ = self.__call__(img, cls) + if not dt_boxes and not rec_res: + ocr_res.append(None) + continue tmp_res = [[box.tolist(), res] for box, res in zip(dt_boxes, rec_res)] ocr_res.append(tmp_res) @@ -549,6 +552,9 @@ class PaddleOCR(predict_system.TextSystem): ocr_res = [] for idx, img in enumerate(imgs): dt_boxes, elapse = self.text_detector(img) + if not dt_boxes: + ocr_res.append(None) + continue tmp_res = [box.tolist() for box in dt_boxes] ocr_res.append(tmp_res) return ocr_res diff --git a/tools/infer/predict_system.py b/tools/infer/predict_system.py index 1f9e2e1d..95d87be6 100755 --- a/tools/infer/predict_system.py +++ b/tools/infer/predict_system.py @@ -65,15 +65,25 @@ class TextSystem(object): self.crop_image_res_index += bbox_num def __call__(self, img, cls=True): - time_dict = {'det': 0, 'rec': 0, 'csl': 0, 'all': 0} + time_dict = {'det': 0, 'rec': 0, 'cls': 0, 'all': 0} + + if img is None: + logger.debug("no valid image provided") + return None, None, time_dict + start = time.time() ori_im = img.copy() dt_boxes, elapse = self.text_detector(img) time_dict['det'] = elapse - logger.debug("dt_boxes num : {}, elapse : {}".format( - len(dt_boxes), elapse)) + if dt_boxes is None: - return None, None + logger.debug("no dt_boxes found, elapsed : {}".format(elapse)) + end = time.time() + time_dict['all'] = end - start + return None, None, time_dict + else: + logger.debug("dt_boxes num : {}, elapsed : {}".format( + len(dt_boxes), elapse)) img_crop_list = [] dt_boxes = sorted_boxes(dt_boxes) @@ -89,12 +99,12 @@ class TextSystem(object): img_crop_list, angle_list, elapse = self.text_classifier( img_crop_list) time_dict['cls'] = elapse - logger.debug("cls num : {}, elapse : {}".format( + logger.debug("cls num : {}, elapsed : {}".format( len(img_crop_list), elapse)) rec_res, elapse = self.text_recognizer(img_crop_list) time_dict['rec'] = elapse - logger.debug("rec_res num : {}, elapse : {}".format( + logger.debug("rec_res num : {}, elapsed : {}".format( len(rec_res), elapse)) if self.args.save_crop_res: self.draw_crop_rec_res(self.args.crop_res_save_dir, img_crop_list, -- Gitee From 8967e633929cd38e03f72fa1658f49adc4330178 Mon Sep 17 00:00:00 2001 From: UserUnknownFactor <63057995+UserUnknownFactor@users.noreply.github.com> Date: Thu, 20 Jul 2023 15:24:42 +0300 Subject: [PATCH 099/112] Add preprocessing common to OCR tasks (#10217) Add preprocessing to options --- paddleocr.py | 36 ++++++++++++++++++++++++++++-------- ppocr/utils/utility.py | 19 +++++++++++++++++++ ppstructure/utility.py | 17 ++++++++++++++++- tools/infer/utility.py | 4 +++- 4 files changed, 66 insertions(+), 10 deletions(-) diff --git a/paddleocr.py b/paddleocr.py index 61e9482b..409b9334 100644 --- a/paddleocr.py +++ b/paddleocr.py @@ -43,7 +43,7 @@ from tools.infer import predict_system from ppocr.utils.logging import get_logger logger = get_logger() -from ppocr.utils.utility import check_and_read, get_image_file_list +from ppocr.utils.utility import check_and_read, get_image_file_list, alpha_to_color, binarize_img from ppocr.utils.network import maybe_download, download_with_progressbar, is_link, confirm_model_dir_url from tools.infer.utility import draw_ocr, str2bool, check_gpu from ppstructure.utility import init_args, draw_structure_result @@ -424,7 +424,7 @@ def get_model_config(type, version, model_type, lang): def img_decode(content: bytes): np_arr = np.frombuffer(content, dtype=np.uint8) - return cv2.imdecode(np_arr, cv2.IMREAD_COLOR) + return cv2.imdecode(np_arr, cv2.IMREAD_UNCHANGED) def check_img(img): @@ -510,7 +510,7 @@ class PaddleOCR(predict_system.TextSystem): super().__init__(params) self.page_num = params.page_num - def ocr(self, img, det=True, rec=True, cls=True): + def ocr(self, img, det=True, rec=True, cls=True, bin=False, inv=False, alpha_color=(255, 255, 255)): """ OCR with PaddleOCR args: @@ -518,6 +518,9 @@ class PaddleOCR(predict_system.TextSystem): det: use text detection or not. If False, only rec will be exec. Default is True rec: use text recognition or not. If False, only det will be exec. Default is True cls: use angle classifier or not. Default is True. If True, the text with rotation of 180 degrees can be recognized. If no text is rotated by 180 degrees, use cls=False to get better performance. Text with rotation of 90 or 270 degrees can be recognized even if cls=False. + bin: binarize image to black and white. Default is False. + inv: invert image colors. Default is False. + alpha_color: set RGB color Tuple for transparent parts replacement. Default is pure white. """ assert isinstance(img, (np.ndarray, list, str, bytes)) if isinstance(img, list) and det == True: @@ -532,14 +535,24 @@ class PaddleOCR(predict_system.TextSystem): # for infer pdf file if isinstance(img, list): if self.page_num > len(img) or self.page_num == 0: - imgs=img + imgs = img else: imgs = img[:self.page_num] else: imgs = [img] + + def preprocess_image(_image): + _image = alpha_to_color(_image, alpha_color) + if inv: + _image = cv2.bitwise_not(_image) + if bin: + _image = binarize_img(_image) + return _image + if det and rec: ocr_res = [] for idx, img in enumerate(imgs): + img = preprocess_image(img) dt_boxes, rec_res, _ = self.__call__(img, cls) if not dt_boxes and not rec_res: ocr_res.append(None) @@ -551,6 +564,7 @@ class PaddleOCR(predict_system.TextSystem): elif det and not rec: ocr_res = [] for idx, img in enumerate(imgs): + img = preprocess_image(img) dt_boxes, elapse = self.text_detector(img) if not dt_boxes: ocr_res.append(None) @@ -563,6 +577,7 @@ class PaddleOCR(predict_system.TextSystem): cls_res = [] for idx, img in enumerate(imgs): if not isinstance(img, list): + img = preprocess_image(img) img = [img] if self.use_angle_cls and cls: img, cls_res_tmp, elapse = self.text_classifier(img) @@ -664,10 +679,15 @@ def main(): img_name = os.path.basename(img_path).split('.')[0] logger.info('{}{}{}'.format('*' * 10, img_path, '*' * 10)) if args.type == 'ocr': - result = engine.ocr(img_path, - det=args.det, - rec=args.rec, - cls=args.use_angle_cls) + result = engine.ocr( + img_path, + det=args.det, + rec=args.rec, + cls=args.use_angle_cls, + bin=args.binarize, + inv=args.invert, + alpha_color=args.alphacolor + ) if result is not None: for idx in range(len(result)): res = result[idx] diff --git a/ppocr/utils/utility.py b/ppocr/utils/utility.py index ebff2fe7..b20ac676 100755 --- a/ppocr/utils/utility.py +++ b/ppocr/utils/utility.py @@ -71,6 +71,25 @@ def get_image_file_list(img_file): imgs_lists = sorted(imgs_lists) return imgs_lists +def binarize_img(img): + if len(img.shape) == 3 and img.shape[2] == 3: + gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # conversion to grayscale image + # use cv2 threshold binarization + _, gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) + img = cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR) + return img + +def alpha_to_color(img, alpha_color=(255, 255, 255)): + if len(img.shape) == 3 and img.shape[2] == 4: + B, G, R, A = cv2.split(img) + alpha = A / 255 + + R = (alpha_color[0] * (1 - alpha) + R * alpha).astype(np.uint8) + G = (alpha_color[1] * (1 - alpha) + G * alpha).astype(np.uint8) + B = (alpha_color[2] * (1 - alpha) + B * alpha).astype(np.uint8) + + img = cv2.merge((B, G, R)) + return img def check_and_read(img_path): if os.path.basename(img_path)[-3:].lower() == 'gif': diff --git a/ppstructure/utility.py b/ppstructure/utility.py index d909f1a8..182283a7 100644 --- a/ppstructure/utility.py +++ b/ppstructure/utility.py @@ -15,7 +15,7 @@ import random import ast from PIL import Image, ImageDraw, ImageFont import numpy as np -from tools.infer.utility import draw_ocr_box_txt, str2bool, init_args as infer_args +from tools.infer.utility import draw_ocr_box_txt, str2bool, str2int_tuple, init_args as infer_args def init_args(): @@ -98,6 +98,21 @@ def init_args(): type=str2bool, default=False, help='Whether to use pdf2docx api') + parser.add_argument( + "--invert", + type=str2bool, + default=False, + help='Whether to invert image before processing') + parser.add_argument( + "--binarize", + type=str2bool, + default=False, + help='Whether to threshold binarize image before processing') + parser.add_argument( + "--alphacolor", + type=str2int_tuple, + default=(255, 255, 255), + help='Replacement color for the alpha channel, if the latter is present; R,G,B integers') return parser diff --git a/tools/infer/utility.py b/tools/infer/utility.py index 959373cd..8ada2b3b 100644 --- a/tools/infer/utility.py +++ b/tools/infer/utility.py @@ -28,8 +28,10 @@ from ppocr.utils.logging import get_logger def str2bool(v): - return v.lower() in ("true", "t", "1") + return v.lower() in ("true", "yes", "t", "y", "1") +def str2int_tuple(v): + return tuple([int(i.strip()) for i in v.split(",")]) def init_args(): parser = argparse.ArgumentParser() -- Gitee From 43753204b643ee50f03d4dac4aa9302ec7a62ce9 Mon Sep 17 00:00:00 2001 From: PeiyuLau <135964669+PeiyuLau@users.noreply.github.com> Date: Fri, 21 Jul 2023 10:22:05 +0800 Subject: [PATCH 100/112] [MLU] add mlu device for infer (#10249) --- tools/infer/utility.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/infer/utility.py b/tools/infer/utility.py index 8ada2b3b..b6a77063 100644 --- a/tools/infer/utility.py +++ b/tools/infer/utility.py @@ -30,15 +30,18 @@ from ppocr.utils.logging import get_logger def str2bool(v): return v.lower() in ("true", "yes", "t", "y", "1") + def str2int_tuple(v): return tuple([int(i.strip()) for i in v.split(",")]) + def init_args(): parser = argparse.ArgumentParser() # params for prediction engine parser.add_argument("--use_gpu", type=str2bool, default=True) parser.add_argument("--use_xpu", type=str2bool, default=False) parser.add_argument("--use_npu", type=str2bool, default=False) + parser.add_argument("--use_mlu", type=str2bool, default=False) parser.add_argument("--ir_optim", type=str2bool, default=True) parser.add_argument("--use_tensorrt", type=str2bool, default=False) parser.add_argument("--min_subgraph_size", type=int, default=15) @@ -249,6 +252,8 @@ def create_predictor(args, mode, logger): elif args.use_npu: config.enable_custom_device("npu") + elif args.use_mlu: + config.enable_custom_device("mlu") elif args.use_xpu: config.enable_xpu(10 * 1024 * 1024) else: -- Gitee From d65842fe5a55f15d77e2b5961270d48601521030 Mon Sep 17 00:00:00 2001 From: shiyutang <34859558+shiyutang@users.noreply.github.com> Date: Fri, 21 Jul 2023 10:39:32 +0800 Subject: [PATCH 101/112] Create newfeature.md --- .github/ISSUE_TEMPLATE/newfeature.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/newfeature.md diff --git a/.github/ISSUE_TEMPLATE/newfeature.md b/.github/ISSUE_TEMPLATE/newfeature.md new file mode 100644 index 00000000..95e1488b --- /dev/null +++ b/.github/ISSUE_TEMPLATE/newfeature.md @@ -0,0 +1,17 @@ +--- +name: Issue template +about: Issue template for new features. +title: '' +labels: '' +assignees: '' + +--- + +## 背景 + +经过需求征集https://github.com/PaddlePaddle/PaddleOCR/issues/10334 和每周技术研讨会 https://github.com/PaddlePaddle/PaddleOCR/issues/10223 讨论,我们确定了XXXX任务。 + +## 解决步骤 +1. 根据开源代码进行网络结构、评估指标转换。代码链接:XXXX +2. 结合[论文复现指南](https://github.com/PaddlePaddle/models/blob/release%2F2.2/tutorials/article-implementation/ArticleReproduction_CV.md),进行前反向对齐等操作,达到论文Table.1中的指标。 +3. 参考[PR提交规范](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.6/doc/doc_ch/code_and_doc.md)提交代码PR到ppocr中。 -- Gitee From b13f99607653c220ba94df2a8650edac086b0f37 Mon Sep 17 00:00:00 2001 From: shiyutang <34859558+shiyutang@users.noreply.github.com> Date: Fri, 21 Jul 2023 10:41:51 +0800 Subject: [PATCH 102/112] Update newfeature.md --- .github/ISSUE_TEMPLATE/newfeature.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/newfeature.md b/.github/ISSUE_TEMPLATE/newfeature.md index 95e1488b..4ffcbbb5 100644 --- a/.github/ISSUE_TEMPLATE/newfeature.md +++ b/.github/ISSUE_TEMPLATE/newfeature.md @@ -1,9 +1,9 @@ --- -name: Issue template +name: New Feature Issue template about: Issue template for new features. title: '' -labels: '' -assignees: '' +labels: 'Code PR is needed' +assignees: 'shiyutang' --- -- Gitee From d14ad025412d2b185d32d76de958a20c2e248827 Mon Sep 17 00:00:00 2001 From: duanyanhui <45005871+YanhuiDua@users.noreply.github.com> Date: Mon, 24 Jul 2023 19:56:55 +0800 Subject: [PATCH 103/112] update npu python version of tipc (#10461) --- test_tipc/test_train_inference_python_npu.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test_tipc/test_train_inference_python_npu.sh b/test_tipc/test_train_inference_python_npu.sh index ebeaefbc..4341ceea 100644 --- a/test_tipc/test_train_inference_python_npu.sh +++ b/test_tipc/test_train_inference_python_npu.sh @@ -29,6 +29,8 @@ fi sed -i 's/use_gpu/use_npu/g' $FILENAME # disable benchmark as AutoLog required nvidia-smi command sed -i 's/--benchmark:True/--benchmark:False/g' $FILENAME +# python has been updated to version 3.9 for npu backend +sed -i "s/python3.7/python3.9/g" $FILENAME dataline=`cat $FILENAME` # parser params -- Gitee From 2b7b9dc2cfa696bc2d2c3867556247faa7b1b4c4 Mon Sep 17 00:00:00 2001 From: kerneltravel Date: Wed, 2 Aug 2023 10:10:36 +0800 Subject: [PATCH 104/112] remove unused imported module, so can avoid PyInstaller packaged binary's start-time not found module error. (#10502) --- ppocr/data/imaug/ct_process.py | 1 - ppocr/utils/utility.py | 1 - 2 files changed, 2 deletions(-) diff --git a/ppocr/data/imaug/ct_process.py b/ppocr/data/imaug/ct_process.py index 59715090..67d89c28 100644 --- a/ppocr/data/imaug/ct_process.py +++ b/ppocr/data/imaug/ct_process.py @@ -20,7 +20,6 @@ import paddle import numpy as np import Polygon as plg -import scipy.io as scio from PIL import Image import paddle.vision.transforms as transforms diff --git a/ppocr/utils/utility.py b/ppocr/utils/utility.py index b20ac676..18cc2859 100755 --- a/ppocr/utils/utility.py +++ b/ppocr/utils/utility.py @@ -14,7 +14,6 @@ import logging import os -import imghdr import cv2 import random import numpy as np -- Gitee From 1e11f254094305c593d4c734a5c4148f945accaa Mon Sep 17 00:00:00 2001 From: ToddBear <43341135+ToddBear@users.noreply.github.com> Date: Wed, 2 Aug 2023 19:11:28 +0800 Subject: [PATCH 105/112] =?UTF-8?q?CV=E5=A5=97=E4=BB=B6=E5=BB=BA=E8=AE=BE?= =?UTF-8?q?=E4=B8=93=E9=A1=B9=E6=B4=BB=E5=8A=A8=20-=20=E6=96=87=E5=AD=97?= =?UTF-8?q?=E8=AF=86=E5=88=AB=E8=BF=94=E5=9B=9E=E5=8D=95=E5=AD=97=E8=AF=86?= =?UTF-8?q?=E5=88=AB=E5=9D=90=E6=A0=87=20(#10515)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * modification of return word box * update_implements * Update rec_postprocess.py * Update utility.py --- ppocr/postprocess/rec_postprocess.py | 78 ++++++++++++++++++++++++++-- ppstructure/predict_system.py | 26 +++++++--- ppstructure/utility.py | 59 ++++++++++++++++++++- tools/infer/predict_rec.py | 10 +++- tools/infer/predict_system.py | 2 +- tools/infer/utility.py | 4 ++ 6 files changed, 163 insertions(+), 16 deletions(-) diff --git a/ppocr/postprocess/rec_postprocess.py b/ppocr/postprocess/rec_postprocess.py index fbf8b93e..230f84d1 100644 --- a/ppocr/postprocess/rec_postprocess.py +++ b/ppocr/postprocess/rec_postprocess.py @@ -67,7 +67,66 @@ class BaseRecLabelDecode(object): def add_special_char(self, dict_character): return dict_character - def decode(self, text_index, text_prob=None, is_remove_duplicate=False): + def get_word_info(self, text, selection): + """ + Group the decoded characters and record the corresponding decoded positions. + + Args: + text: the decoded text + selection: the bool array that identifies which columns of features are decoded as non-separated characters + Returns: + word_list: list of the grouped words + word_col_list: list of decoding positions corresponding to each character in the grouped word + state_list: list of marker to identify the type of grouping words, including two types of grouping words: + - 'cn': continous chinese characters (e.g., 你好啊) + - 'en&num': continous english characters (e.g., hello), number (e.g., 123, 1.123), or mixed of them connected by '-' (e.g., VGG-16) + The remaining characters in text are treated as separators between groups (e.g., space, '(', ')', etc.). + """ + state = None + word_content = [] + word_col_content = [] + word_list = [] + word_col_list = [] + state_list = [] + valid_col = np.where(selection==True)[0] + + for c_i, char in enumerate(text): + if '\u4e00' <= char <= '\u9fff': + c_state = 'cn' + elif bool(re.search('[a-zA-Z0-9]', char)): + c_state = 'en&num' + else: + c_state = 'splitter' + + if char == '.' and state == 'en&num' and c_i + 1 < len(text) and bool(re.search('[0-9]', text[c_i+1])): # grouping floting number + c_state = 'en&num' + if char == '-' and state == "en&num": # grouping word with '-', such as 'state-of-the-art' + c_state = 'en&num' + + if state == None: + state = c_state + + if state != c_state: + if len(word_content) != 0: + word_list.append(word_content) + word_col_list.append(word_col_content) + state_list.append(state) + word_content = [] + word_col_content = [] + state = c_state + + if state != "splitter": + word_content.append(char) + word_col_content.append(valid_col[c_i]) + + if len(word_content) != 0: + word_list.append(word_content) + word_col_list.append(word_col_content) + state_list.append(state) + + return word_list, word_col_list, state_list + + def decode(self, text_index, text_prob=None, is_remove_duplicate=False, return_word_box=False): """ convert text-index into text-label. """ result_list = [] ignored_tokens = self.get_ignored_tokens() @@ -95,8 +154,12 @@ class BaseRecLabelDecode(object): if self.reverse: # for arabic rec text = self.pred_reverse(text) - - result_list.append((text, np.mean(conf_list).tolist())) + + if return_word_box: + word_list, word_col_list, state_list = self.get_word_info(text, selection) + result_list.append((text, np.mean(conf_list).tolist(), [len(text_index[batch_idx]), word_list, word_col_list, state_list])) + else: + result_list.append((text, np.mean(conf_list).tolist())) return result_list def get_ignored_tokens(self): @@ -111,14 +174,19 @@ class CTCLabelDecode(BaseRecLabelDecode): super(CTCLabelDecode, self).__init__(character_dict_path, use_space_char) - def __call__(self, preds, label=None, *args, **kwargs): + def __call__(self, preds, label=None, return_word_box=False, *args, **kwargs): if isinstance(preds, tuple) or isinstance(preds, list): preds = preds[-1] if isinstance(preds, paddle.Tensor): preds = preds.numpy() preds_idx = preds.argmax(axis=2) preds_prob = preds.max(axis=2) - text = self.decode(preds_idx, preds_prob, is_remove_duplicate=True) + text = self.decode(preds_idx, preds_prob, is_remove_duplicate=True, return_word_box=return_word_box) + if return_word_box: + for rec_idx, rec in enumerate(text): + wh_ratio = kwargs['wh_ratio_list'][rec_idx] + max_wh_ratio = kwargs['max_wh_ratio'] + rec[2][0] = rec[2][0]*(wh_ratio/max_wh_ratio) if label is None: return text label = self.decode(label) diff --git a/ppstructure/predict_system.py b/ppstructure/predict_system.py index b32b7062..b8b87168 100644 --- a/ppstructure/predict_system.py +++ b/ppstructure/predict_system.py @@ -34,7 +34,7 @@ from ppocr.utils.visual import draw_ser_results, draw_re_results from tools.infer.predict_system import TextSystem from ppstructure.layout.predict_layout import LayoutPredictor from ppstructure.table.predict_table import TableSystem, to_excel -from ppstructure.utility import parse_args, draw_structure_result +from ppstructure.utility import parse_args, draw_structure_result, cal_ocr_word_box logger = get_logger() @@ -79,6 +79,8 @@ class StructureSystem(object): from ppstructure.kie.predict_kie_token_ser_re import SerRePredictor self.kie_predictor = SerRePredictor(args) + self.return_word_box = args.return_word_box + def __call__(self, img, return_ocr_result_in_table=False, img_idx=0): time_dict = { 'image_orientation': 0, @@ -156,17 +158,27 @@ class StructureSystem(object): ] res = [] for box, rec_res in zip(filter_boxes, filter_rec_res): - rec_str, rec_conf = rec_res + rec_str, rec_conf = rec_res[0], rec_res[1] for token in style_token: if token in rec_str: rec_str = rec_str.replace(token, '') if not self.recovery: box += [x1, y1] - res.append({ - 'text': rec_str, - 'confidence': float(rec_conf), - 'text_region': box.tolist() - }) + if self.return_word_box: + word_box_content_list, word_box_list = cal_ocr_word_box(rec_str, box, rec_res[2]) + res.append({ + 'text': rec_str, + 'confidence': float(rec_conf), + 'text_region': box.tolist(), + 'text_word': word_box_content_list, + 'text_word_region': word_box_list + }) + else: + res.append({ + 'text': rec_str, + 'confidence': float(rec_conf), + 'text_region': box.tolist() + }) res_list.append({ 'type': region['label'].lower(), 'bbox': [x1, y1, x2, y2], diff --git a/ppstructure/utility.py b/ppstructure/utility.py index 182283a7..320722d1 100644 --- a/ppstructure/utility.py +++ b/ppstructure/utility.py @@ -16,7 +16,7 @@ import ast from PIL import Image, ImageDraw, ImageFont import numpy as np from tools.infer.utility import draw_ocr_box_txt, str2bool, str2int_tuple, init_args as infer_args - +import math def init_args(): parser = infer_args() @@ -166,6 +166,63 @@ def draw_structure_result(image, result, font_path): txts.append(text_result['text']) scores.append(text_result['confidence']) + if 'text_word_region' in text_result: + for word_region in text_result['text_word_region']: + char_box = word_region + box_height = int( + math.sqrt((char_box[0][0] - char_box[3][0])**2 + (char_box[0][1] - char_box[3][1])**2)) + box_width = int( + math.sqrt((char_box[0][0] - char_box[1][0])**2 + (char_box[0][1] - char_box[1][1])**2)) + if box_height == 0 or box_width == 0: + continue + boxes.append(word_region) + txts.append("") + scores.append(1.0) + im_show = draw_ocr_box_txt( img_layout, boxes, txts, scores, font_path=font_path, drop_score=0) return im_show + +def cal_ocr_word_box(rec_str, box, rec_word_info): + ''' Calculate the detection frame for each word based on the results of recognition and detection of ocr''' + + col_num, word_list, word_col_list, state_list = rec_word_info + box = box.tolist() + bbox_x_start = box[0][0] + bbox_x_end = box[1][0] + bbox_y_start = box[0][1] + bbox_y_end = box[2][1] + + cell_width = (bbox_x_end - bbox_x_start)/col_num + + word_box_list = [] + word_box_content_list = [] + cn_width_list = [] + cn_col_list = [] + for word, word_col, state in zip(word_list, word_col_list, state_list): + if state == 'cn': + if len(word_col) != 1: + char_seq_length = (word_col[-1] - word_col[0] + 1) * cell_width + char_width = char_seq_length/(len(word_col)-1) + cn_width_list.append(char_width) + cn_col_list += word_col + word_box_content_list += word + else: + cell_x_start = bbox_x_start + int(word_col[0] * cell_width) + cell_x_end = bbox_x_start + int((word_col[-1]+1) * cell_width) + cell = ((cell_x_start, bbox_y_start), (cell_x_end, bbox_y_start), (cell_x_end, bbox_y_end), (cell_x_start, bbox_y_end)) + word_box_list.append(cell) + word_box_content_list.append("".join(word)) + if len(cn_col_list) != 0: + if len(cn_width_list) != 0: + avg_char_width = np.mean(cn_width_list) + else: + avg_char_width = (bbox_x_end - bbox_x_start)/len(rec_str) + for center_idx in cn_col_list: + center_x = (center_idx+0.5)*cell_width + cell_x_start = max(int(center_x - avg_char_width/2), 0) + bbox_x_start + cell_x_end = min(int(center_x + avg_char_width/2), bbox_x_end-bbox_x_start) + bbox_x_start + cell = ((cell_x_start, bbox_y_start), (cell_x_end, bbox_y_start), (cell_x_end, bbox_y_end), (cell_x_start, bbox_y_end)) + word_box_list.append(cell) + + return word_box_content_list, word_box_list \ No newline at end of file diff --git a/tools/infer/predict_rec.py b/tools/infer/predict_rec.py index b3ef557c..7f4a3863 100755 --- a/tools/infer/predict_rec.py +++ b/tools/infer/predict_rec.py @@ -116,6 +116,7 @@ class TextRecognizer(object): "use_space_char": args.use_space_char } self.postprocess_op = build_post_process(postprocess_params) + self.postprocess_params = postprocess_params self.predictor, self.input_tensor, self.output_tensors, self.config = \ utility.create_predictor(args, 'rec', logger) self.benchmark = args.benchmark @@ -139,6 +140,7 @@ class TextRecognizer(object): ], warmup=0, logger=logger) + self.return_word_box = args.return_word_box def resize_norm_img(self, img, max_wh_ratio): imgC, imgH, imgW = self.rec_image_shape @@ -407,11 +409,12 @@ class TextRecognizer(object): valid_ratios = [] imgC, imgH, imgW = self.rec_image_shape[:3] max_wh_ratio = imgW / imgH - # max_wh_ratio = 0 + wh_ratio_list = [] for ino in range(beg_img_no, end_img_no): h, w = img_list[indices[ino]].shape[0:2] wh_ratio = w * 1.0 / h max_wh_ratio = max(max_wh_ratio, wh_ratio) + wh_ratio_list.append(wh_ratio) for ino in range(beg_img_no, end_img_no): if self.rec_algorithm == "SAR": norm_img, _, _, valid_ratio = self.resize_norm_img_sar( @@ -616,7 +619,10 @@ class TextRecognizer(object): preds = outputs else: preds = outputs[0] - rec_result = self.postprocess_op(preds) + if self.postprocess_params['name'] == 'CTCLabelDecode': + rec_result = self.postprocess_op(preds, return_word_box=self.return_word_box, wh_ratio_list=wh_ratio_list, max_wh_ratio=max_wh_ratio) + else: + rec_result = self.postprocess_op(preds) for rno in range(len(rec_result)): rec_res[indices[beg_img_no + rno]] = rec_result[rno] if self.benchmark: diff --git a/tools/infer/predict_system.py b/tools/infer/predict_system.py index 95d87be6..8af45b4c 100755 --- a/tools/infer/predict_system.py +++ b/tools/infer/predict_system.py @@ -111,7 +111,7 @@ class TextSystem(object): rec_res) filter_boxes, filter_rec_res = [], [] for box, rec_result in zip(dt_boxes, rec_res): - text, score = rec_result + text, score = rec_result[0], rec_result[1] if score >= self.drop_score: filter_boxes.append(box) filter_rec_res.append(rec_result) diff --git a/tools/infer/utility.py b/tools/infer/utility.py index b6a77063..4883015b 100644 --- a/tools/infer/utility.py +++ b/tools/infer/utility.py @@ -150,6 +150,10 @@ def init_args(): parser.add_argument("--show_log", type=str2bool, default=True) parser.add_argument("--use_onnx", type=str2bool, default=False) + + # extended function + parser.add_argument("--return_word_box", type=str2bool, default=False, help='Whether return the bbox of each word (split by space) or chinese character. Only used in ppstructure for layout recovery') + return parser -- Gitee From f04871ed3e51db2753db161c8aaf102b16d6d9a4 Mon Sep 17 00:00:00 2001 From: Ligoml <39876205+Ligoml@users.noreply.github.com> Date: Sat, 5 Aug 2023 18:46:00 +0800 Subject: [PATCH 106/112] =?UTF-8?q?Update=20README=5Fch.md,=20=E6=9B=B4?= =?UTF-8?q?=E6=96=B0=E3=80=8C=E5=BC=80=E6=BA=90=E5=85=B1=E5=BB=BA=E3=80=8D?= =?UTF-8?q?=E4=B8=AD=E6=96=87=E6=96=87=E6=A1=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README_ch.md | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/README_ch.md b/README_ch.md index c8acb6fe..556ff5d3 100755 --- a/README_ch.md +++ b/README_ch.md @@ -75,19 +75,14 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库,助力 -## 👫 开源社区 -- **📑项目合作:** 如果您是企业开发者且有明确的OCR垂类应用需求,填写[问卷](https://paddle.wjx.cn/vj/QwF7GKw.aspx)后可免费与官方团队展开不同层次的合作。 -- **👫加入社区:** **微信扫描二维码并填写问卷之后,加入交流群领取20G重磅OCR学习大礼包** - - **包括《动手学OCR》电子书** ,配套讲解视频和notebook项目;**PaddleOCR历次发版直播课回放链接**; - - **OCR场景应用模型集合:** 包含数码管、液晶屏、车牌、高精度SVTR模型、手写体识别等垂类模型,覆盖通用,制造、金融、交通行业的主要OCR垂类应用。 - - PDF2Word应用程序;OCR社区优秀开发者项目分享视频。 -- **🏅️社区项目**:[社区项目](./doc/doc_ch/thirdparty.md)文档中包含了社区用户**使用PaddleOCR开发的各种工具、应用**以及**为PaddleOCR贡献的功能、优化的文档与代码**等,是官方为社区开发者打造的荣誉墙,也是帮助优质项目宣传的广播站。 -- **🎁社区常规赛**:社区常规赛是面向OCR开发者的积分赛事,覆盖文档、代码、模型和应用四大类型,以季度为单位评选并发放奖励,赛题详情与报名方法可参考[链接](https://github.com/PaddlePaddle/PaddleOCR/issues/4982)。 - -
- -

PaddleOCR官方交流群二维码

-
+## 🚀 开源共建 + +- **👫 加入社区**:感谢大家长久以来对 PaddleOCR 的支持和关注,与广大开发者共同构建一个专业、和谐、相互帮助的开源社区是 PaddleOCR 的目标。我们非常欢迎各位开发者参与到飞桨社区的开源建设中,加入开源、共建飞桨。**为感谢社区开发者在 PaddleOCR release2.7 中做出的代码贡献,我们将为贡献者制作与邮寄**[**开源贡献证书**](https://github.com/PaddlePaddle/community/blob/master/contributors/certificate-inspection.md)**,烦请**[**填写问卷**](https://paddle.wjx.cn/vm/wFNr6w7.aspx)**提供必要的邮寄信息。** +- 🤩 **社区活动**:飞桨开源社区长期运营与发布各类丰富的活动与开发任务,在 PaddleOCR 社区,你可以关注以下社区活动,并选择自己感兴趣的内容参与开源共建: + - **🎁** **飞桨套件快乐开源常规赛 |** [**传送门**](https://github.com/PaddlePaddle/PaddleOCR/issues/10223)**:**OCR 社区常规赛升级版,以建设更好用的 OCR 套件为目标,包括但不限于学术前沿模型训练与推理、打磨优化 OCR 工具与应用项目开发等,任何有利于社区意见流动和问题解决的行为都热切希望大家的参与。让我们共同成长为飞桨套件的重要 Contributor 🎉🎉🎉。 + - 💡 **新需求征集 |** [**传送门**](https://github.com/PaddlePaddle/PaddleOCR/issues/10334)**:**你在日常研究和实践深度学习过程中,有哪些你期望的 feature 亟待实现?请按照格式描述你想实现的 feature 和你提出的初步实现思路,我们会定期沟通与讨论这些需求,并将其纳入未来的版本规划中。 + - 💬 **PP-SIG 技术研讨****会 |** [**传送门**](https://github.com/PaddlePaddle/community/tree/master/ppsigs)**:**PP-SIG 是飞桨社区开发者由于相同的兴趣汇聚在一起形成的虚拟组织,通过定期召开技术研讨会的方式,分享行业前沿动态、探讨社区需求与技术开发细节、发起社区联合贡献任务。PaddleOCR 希望可以通过 AI 的力量助力任何一位有梦想的开发者实现自己的想法,享受创造价值带来的愉悦。 +- **📑 项目合作:**如果你有企业中明确的 OCR 垂类应用需求,我们推荐你使用训压推一站式全流 -- Gitee From 0105dfc90d8a3a91e0d703828e5c90775b4a1972 Mon Sep 17 00:00:00 2001 From: Ligoml <39876205+Ligoml@users.noreply.github.com> Date: Sat, 5 Aug 2023 18:56:55 +0800 Subject: [PATCH 107/112] Update README_ch.md --- README_ch.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README_ch.md b/README_ch.md index 556ff5d3..da5a87bf 100755 --- a/README_ch.md +++ b/README_ch.md @@ -77,12 +77,12 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库,助力 ## 🚀 开源共建 -- **👫 加入社区**:感谢大家长久以来对 PaddleOCR 的支持和关注,与广大开发者共同构建一个专业、和谐、相互帮助的开源社区是 PaddleOCR 的目标。我们非常欢迎各位开发者参与到飞桨社区的开源建设中,加入开源、共建飞桨。**为感谢社区开发者在 PaddleOCR release2.7 中做出的代码贡献,我们将为贡献者制作与邮寄**[**开源贡献证书**](https://github.com/PaddlePaddle/community/blob/master/contributors/certificate-inspection.md)**,烦请**[**填写问卷**](https://paddle.wjx.cn/vm/wFNr6w7.aspx)**提供必要的邮寄信息。** +- **👫 加入社区**:感谢大家长久以来对 PaddleOCR 的支持和关注,与广大开发者共同构建一个专业、和谐、相互帮助的开源社区是 PaddleOCR 的目标。我们非常欢迎各位开发者参与到飞桨社区的开源建设中,加入开源、共建飞桨。**为感谢社区开发者在 PaddleOCR release2.7 中做出的代码贡献,我们将为贡献者制作与邮寄[开源贡献证书](https://github.com/PaddlePaddle/community/blob/master/contributors/certificate-inspection.md),烦请[填写问卷](https://paddle.wjx.cn/vm/wFNr6w7.aspx)提供必要的邮寄信息。** - 🤩 **社区活动**:飞桨开源社区长期运营与发布各类丰富的活动与开发任务,在 PaddleOCR 社区,你可以关注以下社区活动,并选择自己感兴趣的内容参与开源共建: - - **🎁** **飞桨套件快乐开源常规赛 |** [**传送门**](https://github.com/PaddlePaddle/PaddleOCR/issues/10223)**:**OCR 社区常规赛升级版,以建设更好用的 OCR 套件为目标,包括但不限于学术前沿模型训练与推理、打磨优化 OCR 工具与应用项目开发等,任何有利于社区意见流动和问题解决的行为都热切希望大家的参与。让我们共同成长为飞桨套件的重要 Contributor 🎉🎉🎉。 - - 💡 **新需求征集 |** [**传送门**](https://github.com/PaddlePaddle/PaddleOCR/issues/10334)**:**你在日常研究和实践深度学习过程中,有哪些你期望的 feature 亟待实现?请按照格式描述你想实现的 feature 和你提出的初步实现思路,我们会定期沟通与讨论这些需求,并将其纳入未来的版本规划中。 - - 💬 **PP-SIG 技术研讨****会 |** [**传送门**](https://github.com/PaddlePaddle/community/tree/master/ppsigs)**:**PP-SIG 是飞桨社区开发者由于相同的兴趣汇聚在一起形成的虚拟组织,通过定期召开技术研讨会的方式,分享行业前沿动态、探讨社区需求与技术开发细节、发起社区联合贡献任务。PaddleOCR 希望可以通过 AI 的力量助力任何一位有梦想的开发者实现自己的想法,享受创造价值带来的愉悦。 -- **📑 项目合作:**如果你有企业中明确的 OCR 垂类应用需求,我们推荐你使用训压推一站式全流 + - 🎁 **飞桨套件快乐开源常规赛 | [传送门](https://github.com/PaddlePaddle/PaddleOCR/issues/10223)**:OCR 社区常规赛升级版,以建设更好用的 OCR 套件为目标,包括但不限于学术前沿模型训练与推理、打磨优化 OCR 工具与应用项目开发等,任何有利于社区意见流动和问题解决的行为都热切希望大家的参与。让我们共同成长为飞桨套件的重要 Contributor 🎉🎉🎉。 + - 💡 **新需求征集 | [传送门](https://github.com/PaddlePaddle/PaddleOCR/issues/10334)**:你在日常研究和实践深度学习过程中,有哪些你期望的 feature 亟待实现?请按照格式描述你想实现的 feature 和你提出的初步实现思路,我们会定期沟通与讨论这些需求,并将其纳入未来的版本规划中。 + - 💬 **PP-SIG 技术研讨会 | [传送门](https://github.com/PaddlePaddle/community/tree/master/ppsigs)**:PP-SIG 是飞桨社区开发者由于相同的兴趣汇聚在一起形成的虚拟组织,通过定期召开技术研讨会的方式,分享行业前沿动态、探讨社区需求与技术开发细节、发起社区联合贡献任务。PaddleOCR 希望可以通过 AI 的力量助力任何一位有梦想的开发者实现自己的想法,享受创造价值带来的愉悦。 +- 📑 **项目合作**:如果你有企业中明确的 OCR 垂类应用需求,我们推荐你使用训压推一站式全流程高效率开发平台 PaddleX,助力 AI 技术快速落地。PaddleX 还支持联创开发,利润分成!欢迎广大的个人开发者和企业开发者参与进来,共创繁荣的 AI 技术生态! -- Gitee From df54a7b4223c64a8480380f618fe95de3b8cf73b Mon Sep 17 00:00:00 2001 From: Ligoml <39876205+Ligoml@users.noreply.github.com> Date: Sat, 5 Aug 2023 19:00:30 +0800 Subject: [PATCH 108/112] revert README_ch.md update --- README_ch.md | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/README_ch.md b/README_ch.md index da5a87bf..909ae893 100755 --- a/README_ch.md +++ b/README_ch.md @@ -75,15 +75,19 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库,助力 -## 🚀 开源共建 - -- **👫 加入社区**:感谢大家长久以来对 PaddleOCR 的支持和关注,与广大开发者共同构建一个专业、和谐、相互帮助的开源社区是 PaddleOCR 的目标。我们非常欢迎各位开发者参与到飞桨社区的开源建设中,加入开源、共建飞桨。**为感谢社区开发者在 PaddleOCR release2.7 中做出的代码贡献,我们将为贡献者制作与邮寄[开源贡献证书](https://github.com/PaddlePaddle/community/blob/master/contributors/certificate-inspection.md),烦请[填写问卷](https://paddle.wjx.cn/vm/wFNr6w7.aspx)提供必要的邮寄信息。** -- 🤩 **社区活动**:飞桨开源社区长期运营与发布各类丰富的活动与开发任务,在 PaddleOCR 社区,你可以关注以下社区活动,并选择自己感兴趣的内容参与开源共建: - - 🎁 **飞桨套件快乐开源常规赛 | [传送门](https://github.com/PaddlePaddle/PaddleOCR/issues/10223)**:OCR 社区常规赛升级版,以建设更好用的 OCR 套件为目标,包括但不限于学术前沿模型训练与推理、打磨优化 OCR 工具与应用项目开发等,任何有利于社区意见流动和问题解决的行为都热切希望大家的参与。让我们共同成长为飞桨套件的重要 Contributor 🎉🎉🎉。 - - 💡 **新需求征集 | [传送门](https://github.com/PaddlePaddle/PaddleOCR/issues/10334)**:你在日常研究和实践深度学习过程中,有哪些你期望的 feature 亟待实现?请按照格式描述你想实现的 feature 和你提出的初步实现思路,我们会定期沟通与讨论这些需求,并将其纳入未来的版本规划中。 - - 💬 **PP-SIG 技术研讨会 | [传送门](https://github.com/PaddlePaddle/community/tree/master/ppsigs)**:PP-SIG 是飞桨社区开发者由于相同的兴趣汇聚在一起形成的虚拟组织,通过定期召开技术研讨会的方式,分享行业前沿动态、探讨社区需求与技术开发细节、发起社区联合贡献任务。PaddleOCR 希望可以通过 AI 的力量助力任何一位有梦想的开发者实现自己的想法,享受创造价值带来的愉悦。 -- 📑 **项目合作**:如果你有企业中明确的 OCR 垂类应用需求,我们推荐你使用训压推一站式全流程高效率开发平台 PaddleX,助力 AI 技术快速落地。PaddleX 还支持联创开发,利润分成!欢迎广大的个人开发者和企业开发者参与进来,共创繁荣的 AI 技术生态! +## 👫 开源社区 +- **📑项目合作:** 如果您是企业开发者且有明确的OCR垂类应用需求,填写[问卷](https://paddle.wjx.cn/vj/QwF7GKw.aspx)后可免费与官方团队展开不同层次的合作。 +- **👫加入社区:** **微信扫描二维码并填写问卷之后,加入交流群领取20G重磅OCR学习大礼包** + - **包括《动手学OCR》电子书** ,配套讲解视频和notebook项目;**PaddleOCR历次发版直播课回放链接**; + - **OCR场景应用模型集合:** 包含数码管、液晶屏、车牌、高精度SVTR模型、手写体识别等垂类模型,覆盖通用,制造、金融、交通行业的主要OCR垂类应用。 + - PDF2Word应用程序;OCR社区优秀开发者项目分享视频。 +- **🏅️社区项目**:[社区项目](./doc/doc_ch/thirdparty.md)文档中包含了社区用户**使用PaddleOCR开发的各种工具、应用**以及**为PaddleOCR贡献的功能、优化的文档与代码**等,是官方为社区开发者打造的荣誉墙,也是帮助优质项目宣传的广播站。 +- **🎁社区常规赛**:社区常规赛是面向OCR开发者的积分赛事,覆盖文档、代码、模型和应用四大类型,以季度为单位评选并发放奖励,赛题详情与报名方法可参考[链接](https://github.com/PaddlePaddle/PaddleOCR/issues/4982)。 +
+ +

PaddleOCR官方交流群二维码

+
## 🛠️ PP-OCR系列模型列表(更新中) -- Gitee From 1f47bca525de0dbdfd9c021d9398038a253a1574 Mon Sep 17 00:00:00 2001 From: andyj <87074272+andyjpaddle@users.noreply.github.com> Date: Wed, 9 Aug 2023 17:16:43 +0800 Subject: [PATCH 109/112] [bug fix] Fix pdf2word in whl (#10585) * add en doc * fix dead link & test=document_fix * fix dead link & test=document_fix * update args default type & test=document_fix * fix doc & test=document_fix * fix pdf2word in whl --- paddleocr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddleocr.py b/paddleocr.py index 409b9334..a65bf510 100644 --- a/paddleocr.py +++ b/paddleocr.py @@ -729,7 +729,7 @@ def main(): logger.info('processing {}/{} page:'.format(index + 1, len(img_paths))) new_img_name = os.path.basename(new_img_path).split('.')[0] - result = engine(new_img_path, img_idx=index) + result = engine(img, img_idx=index) save_structure_res(result, args.output, img_name, index) if args.recovery and result != []: -- Gitee From 9884073e716af23044718d5527b83575d24db8e3 Mon Sep 17 00:00:00 2001 From: Shubham Chambhare <59397280+Shubham654@users.noreply.github.com> Date: Wed, 9 Aug 2023 17:02:16 +0530 Subject: [PATCH 110/112] Fixed Layout recovery README file (#10493) Co-authored-by: Shubham Chambhare --- ppstructure/recovery/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ppstructure/recovery/README.md b/ppstructure/recovery/README.md index 46a348c8..499ef027 100644 --- a/ppstructure/recovery/README.md +++ b/ppstructure/recovery/README.md @@ -152,7 +152,7 @@ cd PaddleOCR/ppstructure # download model mkdir inference && cd inference # Download the detection model of the ultra-lightweight English PP-OCRv3 model and unzip it -https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_infer.tar && tar xf en_PP-OCRv3_det_infer.tar +wget https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_infer.tar && tar xf en_PP-OCRv3_det_infer.tar # Download the recognition model of the ultra-lightweight English PP-OCRv3 model and unzip it wget https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_infer.tar && tar xf en_PP-OCRv3_rec_infer.tar # Download the ultra-lightweight English table inch model and unzip it -- Gitee From b9381612365cf4daddb745bee482418f082a4877 Mon Sep 17 00:00:00 2001 From: moehuster Date: Wed, 9 Aug 2023 19:35:26 +0800 Subject: [PATCH 111/112] fix memory leak (#10441) * fix memory leak * update: Using smart pointers instead of raw pointers --- deploy/cpp_infer/include/paddleocr.h | 8 +++--- deploy/cpp_infer/include/paddlestructure.h | 6 ++--- deploy/cpp_infer/src/main.cpp | 4 +-- deploy/cpp_infer/src/paddleocr.cpp | 30 +++++++--------------- deploy/cpp_infer/src/paddlestructure.cpp | 22 ++++++---------- 5 files changed, 26 insertions(+), 44 deletions(-) diff --git a/deploy/cpp_infer/include/paddleocr.h b/deploy/cpp_infer/include/paddleocr.h index 16750a15..85b9d15a 100644 --- a/deploy/cpp_infer/include/paddleocr.h +++ b/deploy/cpp_infer/include/paddleocr.h @@ -23,7 +23,7 @@ namespace PaddleOCR { class PPOCR { public: explicit PPOCR(); - ~PPOCR(); + ~PPOCR() = default; std::vector> ocr(std::vector img_list, bool det = true, @@ -47,9 +47,9 @@ protected: std::vector &ocr_results); private: - DBDetector *detector_ = nullptr; - Classifier *classifier_ = nullptr; - CRNNRecognizer *recognizer_ = nullptr; + std::unique_ptr detector_; + std::unique_ptr classifier_; + std::unique_ptr recognizer_; }; } // namespace PaddleOCR diff --git a/deploy/cpp_infer/include/paddlestructure.h b/deploy/cpp_infer/include/paddlestructure.h index 8478a85c..9ae54f48 100644 --- a/deploy/cpp_infer/include/paddlestructure.h +++ b/deploy/cpp_infer/include/paddlestructure.h @@ -23,7 +23,7 @@ namespace PaddleOCR { class PaddleStructure : public PPOCR { public: explicit PaddleStructure(); - ~PaddleStructure(); + ~PaddleStructure() = default; std::vector structure(cv::Mat img, bool layout = false, @@ -37,8 +37,8 @@ private: std::vector time_info_table = {0, 0, 0}; std::vector time_info_layout = {0, 0, 0}; - StructureTableRecognizer *table_model_ = nullptr; - StructureLayoutRecognizer *layout_model_ = nullptr; + std::unique_ptr table_model_; + std::unique_ptr layout_model_; void layout(cv::Mat img, std::vector &structure_result); diff --git a/deploy/cpp_infer/src/main.cpp b/deploy/cpp_infer/src/main.cpp index 0c155dd0..b522a2d1 100644 --- a/deploy/cpp_infer/src/main.cpp +++ b/deploy/cpp_infer/src/main.cpp @@ -82,7 +82,7 @@ void check_params() { } void ocr(std::vector &cv_all_img_names) { - PPOCR ocr = PPOCR(); + PPOCR ocr; if (FLAGS_benchmark) { ocr.reset_timer(); @@ -120,7 +120,7 @@ void ocr(std::vector &cv_all_img_names) { } void structure(std::vector &cv_all_img_names) { - PaddleOCR::PaddleStructure engine = PaddleOCR::PaddleStructure(); + PaddleOCR::PaddleStructure engine; if (FLAGS_benchmark) { engine.reset_timer(); diff --git a/deploy/cpp_infer/src/paddleocr.cpp b/deploy/cpp_infer/src/paddleocr.cpp index 86747c60..e0956474 100644 --- a/deploy/cpp_infer/src/paddleocr.cpp +++ b/deploy/cpp_infer/src/paddleocr.cpp @@ -21,28 +21,28 @@ namespace PaddleOCR { PPOCR::PPOCR() { if (FLAGS_det) { - this->detector_ = new DBDetector( + this->detector_.reset(new DBDetector( FLAGS_det_model_dir, FLAGS_use_gpu, FLAGS_gpu_id, FLAGS_gpu_mem, FLAGS_cpu_threads, FLAGS_enable_mkldnn, FLAGS_limit_type, FLAGS_limit_side_len, FLAGS_det_db_thresh, FLAGS_det_db_box_thresh, FLAGS_det_db_unclip_ratio, FLAGS_det_db_score_mode, FLAGS_use_dilation, - FLAGS_use_tensorrt, FLAGS_precision); + FLAGS_use_tensorrt, FLAGS_precision)); } if (FLAGS_cls && FLAGS_use_angle_cls) { - this->classifier_ = new Classifier( + this->classifier_.reset(new Classifier( FLAGS_cls_model_dir, FLAGS_use_gpu, FLAGS_gpu_id, FLAGS_gpu_mem, FLAGS_cpu_threads, FLAGS_enable_mkldnn, FLAGS_cls_thresh, - FLAGS_use_tensorrt, FLAGS_precision, FLAGS_cls_batch_num); + FLAGS_use_tensorrt, FLAGS_precision, FLAGS_cls_batch_num)); } if (FLAGS_rec) { - this->recognizer_ = new CRNNRecognizer( + this->recognizer_.reset(new CRNNRecognizer( FLAGS_rec_model_dir, FLAGS_use_gpu, FLAGS_gpu_id, FLAGS_gpu_mem, FLAGS_cpu_threads, FLAGS_enable_mkldnn, FLAGS_rec_char_dict_path, FLAGS_use_tensorrt, FLAGS_precision, FLAGS_rec_batch_num, - FLAGS_rec_img_h, FLAGS_rec_img_w); + FLAGS_rec_img_h, FLAGS_rec_img_w)); } -}; +} std::vector> PPOCR::ocr(std::vector img_list, bool det, bool rec, bool cls) { @@ -51,7 +51,7 @@ PPOCR::ocr(std::vector img_list, bool det, bool rec, bool cls) { if (!det) { std::vector ocr_result; ocr_result.resize(img_list.size()); - if (cls && this->classifier_ != nullptr) { + if (cls && this->classifier_) { this->cls(img_list, ocr_result); for (int i = 0; i < img_list.size(); i++) { if (ocr_result[i].cls_label % 2 == 1 && @@ -92,7 +92,7 @@ std::vector PPOCR::ocr(cv::Mat img, bool det, bool rec, img_list.push_back(crop_img); } // cls - if (cls && this->classifier_ != nullptr) { + if (cls && this->classifier_) { this->cls(img_list, ocr_result); for (int i = 0; i < img_list.size(); i++) { if (ocr_result[i].cls_label % 2 == 1 && @@ -190,16 +190,4 @@ void PPOCR::benchmark_log(int img_num) { } } -PPOCR::~PPOCR() { - if (this->detector_ != nullptr) { - delete this->detector_; - } - if (this->classifier_ != nullptr) { - delete this->classifier_; - } - if (this->recognizer_ != nullptr) { - delete this->recognizer_; - } -}; - } // namespace PaddleOCR diff --git a/deploy/cpp_infer/src/paddlestructure.cpp b/deploy/cpp_infer/src/paddlestructure.cpp index b2e35f8c..bde687e2 100644 --- a/deploy/cpp_infer/src/paddlestructure.cpp +++ b/deploy/cpp_infer/src/paddlestructure.cpp @@ -21,20 +21,20 @@ namespace PaddleOCR { PaddleStructure::PaddleStructure() { if (FLAGS_layout) { - this->layout_model_ = new StructureLayoutRecognizer( + this->layout_model_.reset(new StructureLayoutRecognizer( FLAGS_layout_model_dir, FLAGS_use_gpu, FLAGS_gpu_id, FLAGS_gpu_mem, FLAGS_cpu_threads, FLAGS_enable_mkldnn, FLAGS_layout_dict_path, FLAGS_use_tensorrt, FLAGS_precision, FLAGS_layout_score_threshold, - FLAGS_layout_nms_threshold); + FLAGS_layout_nms_threshold)); } if (FLAGS_table) { - this->table_model_ = new StructureTableRecognizer( + this->table_model_.reset(new StructureTableRecognizer( FLAGS_table_model_dir, FLAGS_use_gpu, FLAGS_gpu_id, FLAGS_gpu_mem, FLAGS_cpu_threads, FLAGS_enable_mkldnn, FLAGS_table_char_dict_path, FLAGS_use_tensorrt, FLAGS_precision, FLAGS_table_batch_num, - FLAGS_table_max_len, FLAGS_merge_no_span_structure); + FLAGS_table_max_len, FLAGS_merge_no_span_structure)); } -}; +} std::vector PaddleStructure::structure(cv::Mat srcimg, bool layout, bool table, bool ocr) { @@ -65,7 +65,7 @@ PaddleStructure::structure(cv::Mat srcimg, bool layout, bool table, bool ocr) { } return structure_results; -}; +} void PaddleStructure::layout( cv::Mat img, std::vector &structure_result) { @@ -123,7 +123,7 @@ void PaddleStructure::table(cv::Mat img, structure_result.cell_box = structure_boxes[i]; structure_result.html_score = structure_scores[i]; } -}; +} std::string PaddleStructure::rebuild_table(std::vector structure_html_tags, @@ -286,10 +286,4 @@ void PaddleStructure::benchmark_log(int img_num) { } } -PaddleStructure::~PaddleStructure() { - if (this->table_model_ != nullptr) { - delete this->table_model_; - } -}; - -} // namespace PaddleOCR \ No newline at end of file +} // namespace PaddleOCR -- Gitee From b1f6c210b3778c2ae32056cba2dd79675ebd14ae Mon Sep 17 00:00:00 2001 From: moehuster Date: Wed, 9 Aug 2023 19:37:47 +0800 Subject: [PATCH 112/112] update: Usinig intuitive initialization of duration (#10512) --- deploy/cpp_infer/src/ocr_cls.cpp | 9 +++------ deploy/cpp_infer/src/ocr_rec.cpp | 9 +++------ 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/deploy/cpp_infer/src/ocr_cls.cpp b/deploy/cpp_infer/src/ocr_cls.cpp index 13a03d6a..6f2b5509 100644 --- a/deploy/cpp_infer/src/ocr_cls.cpp +++ b/deploy/cpp_infer/src/ocr_cls.cpp @@ -20,12 +20,9 @@ void Classifier::Run(std::vector img_list, std::vector &cls_labels, std::vector &cls_scores, std::vector ×) { - std::chrono::duration preprocess_diff = - std::chrono::steady_clock::now() - std::chrono::steady_clock::now(); - std::chrono::duration inference_diff = - std::chrono::steady_clock::now() - std::chrono::steady_clock::now(); - std::chrono::duration postprocess_diff = - std::chrono::steady_clock::now() - std::chrono::steady_clock::now(); + std::chrono::duration preprocess_diff = std::chrono::duration::zero(); + std::chrono::duration inference_diff = std::chrono::duration::zero(); + std::chrono::duration postprocess_diff = std::chrono::duration::zero(); int img_num = img_list.size(); std::vector cls_image_shape = {3, 48, 192}; diff --git a/deploy/cpp_infer/src/ocr_rec.cpp b/deploy/cpp_infer/src/ocr_rec.cpp index 96715163..cf3e58d4 100644 --- a/deploy/cpp_infer/src/ocr_rec.cpp +++ b/deploy/cpp_infer/src/ocr_rec.cpp @@ -20,12 +20,9 @@ void CRNNRecognizer::Run(std::vector img_list, std::vector &rec_texts, std::vector &rec_text_scores, std::vector ×) { - std::chrono::duration preprocess_diff = - std::chrono::steady_clock::now() - std::chrono::steady_clock::now(); - std::chrono::duration inference_diff = - std::chrono::steady_clock::now() - std::chrono::steady_clock::now(); - std::chrono::duration postprocess_diff = - std::chrono::steady_clock::now() - std::chrono::steady_clock::now(); + std::chrono::duration preprocess_diff = std::chrono::duration::zero(); + std::chrono::duration inference_diff = std::chrono::duration::zero(); + std::chrono::duration postprocess_diff = std::chrono::duration::zero(); int img_num = img_list.size(); std::vector width_list; -- Gitee