# Paddle OCR

# 偵測文字將其框起來

顯示被框起來的文本、座標

```
from paddleocr import PaddleOCR, draw_ocr
import os
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
# 初始化 PaddleOCR
ocr = PaddleOCR(use_angle_cls=True, lang="ch")
# 指定图片路径
img_path = '15.jpg'
img_name = os.path.basename(img_path)
# 进行文字检测
result_det = ocr.ocr(img_path, det=True)
for idx in range(len(result_det)):
res = result_det[idx]
for line in res:
print("文本:")
print(line[1][0])
print("座標:")
print(line[0][0:4])
# 可视化文字检测结果并保存
image = Image.open(img_path).convert('RGB')
boxes = [line[0] for line in result_det[0]]
im_show = draw_ocr(image, boxes, font_path='./fonts/simfang.ttf')
im_show = Image.fromarray(im_show)
im_show.save('result/{}only_detection.jpg'.format(os.path.splitext(img_name)[0]))
plt.figure(figsize=(15, 8))
plt.imshow(im_show)
plt.show()
##只偵測圖片上的文字
```
# 偵測文字將其框起來-PDF版

```
from paddleocr import PaddleOCR, draw_ocr
import fitz
from PIL import Image
import cv2
import numpy as np
PAGE_NUM = 1 # PDF 頁數
pdf_path = 'test.pdf'
ocr = PaddleOCR(use_angle_cls=True, lang="ch", page_num=PAGE_NUM) # need to run only once to download and load model into memory
# ocr = PaddleOCR(use_angle_cls=True, lang="ch", page_num=PAGE_NUM,use_gpu=0) # 如果需要使用GPU,请取消此行的注释 并注释上一行 / To Use GPU,uncomment this line and comment the above one.
result = ocr.ocr(pdf_path, cls=True)
for idx in range(len(result)):
res = result[idx]
for line in res:
print("文本:")
print(line[1][0])
print("座標:")
print(line[0][0:4])
# 顯示結果
imgs = []
with fitz.open(pdf_path) as pdf:
for pg in range(0, PAGE_NUM):
page = pdf.load_page(pg)
mat = fitz.Matrix(2, 2)
pm = page.get_pixmap(matrix=mat, alpha=False)
# if width or height > 2000 pixels, don't enlarge the image
if pm.width > 2000 or pm.height > 2000:
pm = page.get_pixmap(matrix=fitz.Matrix(1, 1), alpha=False)
img = Image.frombytes("RGB", [pm.width, pm.height], pm.samples)
img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
imgs.append(img)
for idx in range(len(result)):
res = result[idx]
if res is None:
print(f"[DEBUG] Empty page {idx} detected, skip it.")
continue
image = imgs[idx]
boxes = [line[0] for line in res]
#txts = [line[1][0] for line in res] 文字
#scores = [line[1][1] for line in res] 分數
im_show = draw_ocr(image, boxes, font_path='simfang.ttf')
im_show = Image.fromarray(im_show)
im_show.save('result_pdf/result_pdf_detection_{}.jpg'.format(idx))
#PDF 偵測文字
```
# PDF 全程體驗 圖片上顯示含文字及準確度

```
from paddleocr import PaddleOCR, draw_ocr
import fitz
from PIL import Image
import cv2
import numpy as np
PAGE_NUM = 1 # Set the recognition page number
pdf_path = 'test.pdf'
ocr = PaddleOCR(use_angle_cls=True, lang="ch", page_num=PAGE_NUM) # need to run only once to download and load model into memory
# ocr = PaddleOCR(use_angle_cls=True, lang="ch", page_num=PAGE_NUM,use_gpu=0) # 如果需要使用GPU,请取消此行的注释 并注释上一行 / To Use GPU,uncomment this line and comment the above one.
result = ocr.ocr(pdf_path, cls=True)
# 显示结果
imgs = []
with fitz.open(pdf_path) as pdf:
for pg in range(0, PAGE_NUM):
page = pdf.load_page(pg) # 修正索引为从0开始
mat = fitz.Matrix(2, 2)
pm = page.get_pixmap(matrix=mat, alpha=False)
# if width or height > 2000 pixels, don't enlarge the image
if pm.width > 2000 or pm.height > 2000:
pm = page.get_pixmap(matrix=fitz.Matrix(1, 1), alpha=False)
img = Image.frombytes("RGB", [pm.width, pm.height], pm.samples)
img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
imgs.append(img)
for idx in range(len(result)):
res = result[idx]
if res is None:
print(f"[DEBUG] Empty page {idx} detected, skip it.")
continue
image = imgs[idx]
boxes = [line[0] for line in res]
txts = [line[1][0] for line in res]
scores = [line[1][1] for line in res]
im_show = draw_ocr(image, boxes, txts, scores, font_path='simfang.ttf')
im_show = Image.fromarray(im_show)
im_show.save('result_pdf/result_pdf_{}.jpg'.format(idx))
#PDF 全程體驗
```
# 只抓取特定文字

```
from paddleocr import PaddleOCR, draw_ocr
import os
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
# 初始化 PaddleOCR
ocr = PaddleOCR(use_angle_cls=True, lang="ch")
# 指定圖片路徑
img_path = '15.jpg'
img_name = os.path.basename(img_path)
# 要檢測的固定文字
target_text = "冬阳君>"
# 進行文字檢測
result_det = ocr.ocr(img_path, det=True)
target_boxes = []
for res in result_det[0]:
text = res[1][0]
if text == target_text:
target_boxes.append(res[0])
# 可視化文字檢測結果並保存
image = Image.open(img_path).convert('RGB')
im_show = draw_ocr(image, target_boxes, font_path='./fonts/simfang.ttf')
im_show = Image.fromarray(im_show)
im_show.save('result/{}only_detection.jpg'.format(os.path.splitext(img_name)[0]))
plt.figure(figsize=(15, 8))
plt.imshow(im_show)
plt.show()
```
只抓取特定文字-PDF

```
from paddleocr import PaddleOCR, draw_ocr
import fitz
from PIL import Image
import cv2
import numpy as np
import matplotlib.pyplot as plt
PAGE_NUM = 1 # PDF 頁數
pdf_path = 'test.pdf'
ocr = PaddleOCR(use_angle_cls=True, lang="ch", page_num=PAGE_NUM)
# 要檢測的固定文字
target_text = "Sharon"
# 進行文字檢測
result = ocr.ocr(pdf_path, cls=True)
imgs = []
with fitz.open(pdf_path) as pdf:
for pg in range(0, PAGE_NUM):
page = pdf.load_page(pg)
mat = fitz.Matrix(2, 2)
pm = page.get_pixmap(matrix=mat, alpha=False)
# if width or height > 2000 pixels, don't enlarge the image
if pm.width > 2000 or pm.height > 2000:
pm = page.get_pixmap(matrix=fitz.Matrix(1, 1), alpha=False)
img = Image.frombytes("RGB", [pm.width, pm.height], pm.samples)
img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
imgs.append(img)
for idx in range(len(result)):
res = result[idx]
if res is None:
print(f"[DEBUG] Empty page {idx} detected, skip it.")
continue
image = imgs[idx]
target_boxes = []
for line in res:
text = line[1][0]
if text == target_text:
target_boxes.append(line[0])
# 顯示結果
if target_boxes:
im_show = draw_ocr(image, target_boxes, font_path='simfang.ttf')
im_show = Image.fromarray(im_show)
im_show.save('result_pdf/result_pdf_detection_{}.jpg'.format(idx))
plt.figure(figsize=(15, 8))
plt.imshow(im_show)
plt.show()
```
**如果 要直接找出某文字的座標 直接在line([1][0])去判斷和符不符合
符合輸出該座標
-------------------------
問題

如果要抓取特定文字"Sharon"單行只有Sharon沒問題 但如果像是 交給Sharon掃描歸檔就不會偵測到
-------------------------
# PaddleOCR-參考資料
[官方github](https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.7?tab=readme-ov-file)
[PaddleOCR 快速开始](https://github.com/PaddlePaddle/PaddleOCR/blob/main/doc/doc_ch/quickstart.md)
[官方PaddlePaddle文檔](https://www.paddlepaddle.org.cn/tutorials/projectdetail/5603475)
[PPOCRv3的det检测模型finetune训练](https://blog.csdn.net/z5z5z5z56/article/details/129377434)
-------------------------
# PP-Structure
PPstructure
PP-structure 图像方向分类+版面分析+表格识别
原圖:

structure生成 xlsx

```
import os
import cv2
from paddleocr import PPStructure,save_structure_res
from paddleocr.ppstructure.recovery.recovery_to_doc import sorted_layout_boxes, convert_info_docx
# 中文测试图
table_engine = PPStructure(recovery=True)
# 英文测试图
# table_engine = PPStructure(recovery=True, lang='en')
save_folder = 'output'
img_path = 'demo01.jpg'
img = cv2.imread(img_path)
result = table_engine(img)
save_structure_res(result, save_folder, os.path.basename(img_path).split('.')[0])
for line in result:
line.pop('img')
print(line)
h, w, _ = img.shape
res = sorted_layout_boxes(result, w)
convert_info_docx(img, res, save_folder, os.path.basename(img_path).split('.')[0])
#版面恢復
```
原圖:

structure生成 xlsx

PDF 版面恢復-to word

```
import os
def run_paddleocr(image_dir, output_dir):
command = f"paddleocr --image_dir={image_dir} --type=structure --recovery=true --use_pdf2docx_api=true"
os.system(command)
if __name__ == "__main__":
image_dir = "demo02.pdf" # pdf
run_paddleocr(image_dir, output_dir="output/")
```
# PP-structure-參考資料
[github](https://github.com/PaddlePaddle/PaddleOCR/blob/main/ppstructure/README_ch.md)
[PP-Structure工具包:PDF图片表格一键提取解决方案](https://aistudio.baidu.com/projectdetail/2274897)
--------------------------------------
# PP-structure-关键信息抽取
原圖:


[github](https://github.com/PaddlePaddle/PaddleOCR/blob/main/ppstructure/kie/README_ch.md)
根據上面的github下載模型和套件
關鍵信息抽取-RE-cmd指令
```
python kie/predict_kie_token_ser_re.py --kie_algorithm=LayoutXLM --re_model_dir=../inference/re_vi_layoutxlm_xfund_infer --ser_model_dir=../inference/ser_vi_layoutxlm_xfund_infer --use_visual_backbone=False --image_dir=./docs/kie/input/"圖片" --ser_dict_path=../train_data/XFUND/class_list_xfun.txt --vis_font_path=../doc/fonts/simfang.ttf --ocr_order_method="tb-yx"
```