目錄
- 重點是四 先用 PPStructure 版面分析,分成不同的塊兒,再選用 PaddleOCR、或PPStructure
- 基礎路徑
- OCR模型配置
- OCR模型配置
- GPU配置
- 硬件配置
- 性能配置
- 一、框架選型對比分析
- 1. **PaddleOCR核心能力**
- 2. **PP-Structure核心能力**
- 3. **選型結論**
- 二、錯誤根因分析與修復方案
- 1. **錯誤現象**
- 2. **深層原因**
- 3. **修復方案
- 三、性能優化建議
- 1. **GPU資源利用**
- 2. **內存管理**
- 3. **模型裁剪**
- 四、架構設計升級建議
- 1. **流程重組**
- 2. **模塊解耦**
- 五、關鍵參數調優指南
重點是四 先用 PPStructure 版面分析,分成不同的塊兒,再選用 PaddleOCR、或PPStructure
https://chat.deepseek.com/a/chat/s/1dafb23d-d230-48a6-b3a1-72569eb1c7cd
我有解析PDF的程序如下:
1、配置
“”"
OCR處理工具類
功能:統一管理OCR引擎的初始化、處理、資源回收
“”"
import multiprocessing
import os
from pathlib import Path
from typing import Tuple, Optional, Dict, Any
import paddle
from paddleocr import PaddleOCR, PPStructure
from .GPUResource import GPUResourceController
from …utils.logutil import get_local_logger
logger = get_local_logger()
基礎路徑
BASE_DIR = Path(file).parent.parent
MODEL_DIR = BASE_DIR / “common/models”
logger.info(f" >>>>>> MODEL_DIR路徑: {MODEL_DIR}")
OCR模型配置
START_PAGE = 6
OCR模型配置
OCR_CONFIG = {
# 顯式指定所有模型路徑
# det_model_dir=str(MODEL_DIR /“ch_PP-OCRv4_det_infer”),
“det_model_dir”: str(MODEL_DIR / “ch_PP-OCRv4_det_infer”),
“rec_model_dir”: str(MODEL_DIR / “ch_PP-OCRv4_rec_infer”),
“cls_model_dir”: str(MODEL_DIR / “ch_ppocr_mobile_v2.0_cls_infer”),
“table_model_dir”: str(MODEL_DIR / “en_ppstructure_mobile_v2.0_table_structure_infer”),
“layout_model_dir”: str(MODEL_DIR / “en_ppstructure_mobile_v2.0_layout_infer”),
“lang”: “ch”,
“ocr_version”: “PP-OCRv4”,
“table_version”: “PP-StructureV2”
}
GPU配置
GPU_CONFIG = {
“enable_gpu”: True,
“max_gpus”: 2,
“memory_fraction”: 0.5
}
硬件配置
GPU_MEMORY_LIMIT = 0.8 # 單進程最大顯存占比
CPU_THREADS = 4 # CPU模式線程數
性能配置
PRECISION_MODE = “fp16” if paddle.is_compiled_with_cuda() else “fp32”
ENABLE_TENSORRT = True
use_gpu_flag = paddle.device.is_compiled_with_cuda()
print(f"torch.cuda.is_available(){use_gpu_flag}")
print(f"torch.version{paddle.device.get_cudnn_version()}")
class OCREngineInitError(Exception):
“”“OCR引擎初始化異常”“”
pass
“”"
OCR配置工具類
“”"
class OCRConfig:
def __init__(self, process_idx: int = 0):"""初始化OCR工具:param process_idx: 進程ID(用于多進程GPU分配)"""self.process_idx = process_idxself.gpu_id = -1self.ocr_engine = Noneself.table_engine = Noneself._init_gpu()self._init_ocr_engine()self._init_table_engine()def __enter__(self):self._set_hardware_environment()self._init_engines()return selfdef __exit__(self, exc_type, *_):self._cleanup_resources()self._report_gpu_status(exc_type is None)def _set_hardware_environment(self):os.environ["CUDA_VISIBLE_DEVICES"] = str(self.gpu_id) if self.gpu_id != -1 else ""device = "gpu:0" if self.gpu_id != -1 else "cpu"paddle.set_device(device)if self.gpu_id != -1:paddle.set_flags({'FLAGS_fraction_of_gpu_memory_to_use': GPU_MEMORY_LIMIT,'FLAGS_allocator_strategy': 'auto_growth'})def _init_gpu(self) -> None:"""初始化GPU環境"""if not GPU_CONFIG["enable_gpu"] or not paddle.is_compiled_with_cuda():logger.info(f" paddle.is_compiled_with_cuda: {paddle.device.is_compiled_with_cuda()}, 即將運行在CPU模式")paddle.set_device("cpu")returnnum_gpus = paddle.device.cuda.device_count()if num_gpus == 0:logger.warn(f" num_gpus: {num_gpus}, 沒有檢測到可用的GUP, 即將運行在CPU模式")return -1 # 標記為CPU模式self.gpu_id = self.process_idx % num_gpusos.environ["CUDA_VISIBLE_DEVICES"] = str(self.gpu_id)paddle.set_device(f"gpu:{self.gpu_id}")logger.info(f"初始化GPU:{self.gpu_id} | 進程:{self.process_idx}")# 設置顯存分配比例paddle.set_flags({'FLAGS_fraction_of_gpu_memory_to_use': GPU_CONFIG["memory_fraction"]})def _init_ocr_engine(self) -> None:"""初始化OCR引擎"""try:self.ocr_engine = PaddleOCR(use_angle_cls=True,lang=OCR_CONFIG["lang"],page_num=START_PAGE,use_gpu=self.gpu_id != -1,gpu_id=self.gpu_id,table_model_dir="../common/models/en_ppstructure_mobile_v2.0_table_structure_infer", # 顯式指定表格模型# 顯式指定所有模型路徑# det_model_dir=str(MODEL_DIR /"ch_PP-OCRv4_det_infer"),det_model_dir=OCR_CONFIG["det_model_dir"],rec_model_dir=OCR_CONFIG["rec_model_dir"],cls_model_dir=OCR_CONFIG["cls_model_dir"],# ocr_version=OCR_CONFIG["ocr_version"],table_version="PP-StructureV2", # 使用表格識別模型show_log=False,# 性能優化參數enable_mkldnn=True, # Intel CPU 加速use_tensorrt=False, # NVIDIA GPU 加速precision="fp16" # 混合精度推理)print(f" OCR引擎初始化成功 [PID:{os.getpid()}] Assigned to GPU {self.gpu_id} (Process index: {self.process_idx})")except Exception as e:logger.error(f"OCR引擎初始化失敗: {str(e)}")raise OCREngineInitError("OCR初始化失敗") from edef _init_table_engine(self) -> None:"""初始化表格引擎: 使用RapidTable """# table_input = RapidTableInput(# model_type="unitable",# use_cuda=self.gpu_id != -1,# device=f"cuda:{self.gpu_id}" if self.gpu_id != -1 else "cpu"# )# self.table_engine = RapidTable(table_input)"""初始化表格引擎: 使用PPStructure """try:self.table_engine = PPStructure(table_model_dir=OCR_CONFIG["table_model_dir"],layout_model_dir=OCR_CONFIG["layout_model_dir"],table_version=OCR_CONFIG["table_version"],ocr=False,use_gpu=GPU_CONFIG["enable_gpu"],gpu_id=self.gpu_id,show_log=False)logger.info("表格引擎初始化成功")except Exception as e:logger.error(f"表格引擎初始化失敗: {str(e)}")raise OCREngineInitError("表格引擎初始化失敗") from edef _cleanup_resources(self):"""資源清理"""if self.gpu_id != -1:paddle.device.cuda.empty_cache()logger.info(f"已釋放GPU:{self.gpu_id}資源")
def _report_gpu_status(self, success: bool):if self.gpu_id != -1:GPUResourceController().release_gpu(self.gpu_id, success)
class OCRManager:
“”"
OCR引擎管理器(多進程安全)
“”"
_engines = {}
@classmethod
def get_engine(cls, process_idx: int) -> OCRConfig:"""獲取進程專用引擎"""if not process_idx:process_idx = os.getpid()if process_idx not in cls._engines:try:cls._engines[process_idx] = OCRConfig(process_idx)except OCREngineInitError as e:raise RuntimeError(f"無法為進程{process_idx}創建引擎") from ereturn cls._engines[process_idx]
"""
GPU資源管理器(進程安全版本)
核心功能:
- 自動檢測可用GPU
- 實現進程級GPU分配
- 避免跨進程污染
"""
_gpu_ring = []
_lock = multiprocessing.Lock()def __init__(self):self._init_gpu_pool()def _init_gpu_pool(self):"""安全初始化GPU資源池"""with self._lock:if not self._gpu_ring:if paddle.is_compiled_with_cuda():num_gpus = paddle.device.cuda.device_count()self._gpu_ring = list(range(num_gpus)) if num_gpus > 0 else []logger.info(f"GPU資源池初始化完成: {self._gpu_ring}")def acquire_gpu(self, process_id: int) -> int:"""進程安全獲取GPU ID"""with self._lock:if not self._gpu_ring:return -1return self._gpu_ring[process_id % len(self._gpu_ring)]
2、處理程序:
def _process_ocr_and_table(self, img_path, page_idx:int):
# 假設這是你的 OCR 和表格識別邏輯
try:
“”“使用進程級引擎處理”“”
ocr_config = OCRManager.get_engine(os.getpid())
ocr_engine = ocr_config.ocr_engine
table_engine = ocr_config.table_engine
# OCR識別ocr_output = ocr_engine.ocr(img_path)if not ocr_output:raise ValueError(" ocr_output OCR未識別到內容")logger.info(f" 第{page_idx}頁OCR識別結果,類型:{type(ocr_output)}, \n 內容示例:{ocr_output}")# 解析OCR結果:每個元素的結構為 [box_coords, (text, score)]boxes = [line[0] for line in ocr_output] # 提取所有框坐標txts = [line[1][0] for line in ocr_output] # 提取所有文本