Files
MastersThesis/src/paddle_ocr/scripts/test_dynamic_mode.py
Sergio Jimenez Jimenez 580d1b114b
Some checks failed
build_docker / essential (pull_request) Successful in 0s
build_docker / build_gpu (linux/amd64) (pull_request) Has been cancelled
build_docker / build_doctr_gpu (linux/amd64) (pull_request) Has been cancelled
build_docker / build_doctr_gpu (linux/arm64) (pull_request) Has been cancelled
build_docker / manifest_easyocr_gpu (pull_request) Has been cancelled
build_docker / manifest_doctr_gpu (pull_request) Has been cancelled
build_docker / build_cpu (linux/arm64) (pull_request) Has been cancelled
build_docker / build_cpu (linux/amd64) (pull_request) Successful in 5m0s
build_docker / build_gpu (linux/arm64) (pull_request) Has been cancelled
build_docker / manifest_cpu (pull_request) Has been cancelled
build_docker / manifest_gpu (pull_request) Has been cancelled
build_docker / build_easyocr (linux/amd64) (pull_request) Has been cancelled
build_docker / build_easyocr (linux/arm64) (pull_request) Has been cancelled
build_docker / build_doctr (linux/amd64) (pull_request) Has been cancelled
build_docker / build_doctr (linux/arm64) (pull_request) Has been cancelled
build_docker / manifest_easyocr (pull_request) Has been cancelled
build_docker / manifest_doctr (pull_request) Has been cancelled
build_docker / build_easyocr_gpu (linux/amd64) (pull_request) Has been cancelled
build_docker / build_easyocr_gpu (linux/arm64) (pull_request) Has been cancelled
More docs on gpu for paddle
2026-01-18 07:13:51 +01:00

208 lines
6.0 KiB
Python

#!/usr/bin/env python3
"""
Test PaddleOCR in dynamic graph mode (not inference mode).
Dynamic mode compiles kernels at runtime, which may work on Blackwell.
Inference mode uses pre-compiled kernels which fail on sm_121.
Usage:
python test_dynamic_mode.py [image_path]
"""
import os
import sys
os.environ['DISABLE_MODEL_SOURCE_CHECK'] = 'True'
# Force dynamic graph mode
os.environ['FLAGS_enable_pir_api'] = '0'
import numpy as np
import paddle
from PIL import Image
def check_gpu():
"""Check GPU status."""
print("=" * 60)
print("GPU STATUS")
print("=" * 60)
print(f"Device: {paddle.device.get_device()}")
print(f"CUDA compiled: {paddle.device.is_compiled_with_cuda()}")
if paddle.device.is_compiled_with_cuda() and paddle.device.cuda.device_count() > 0:
props = paddle.device.cuda.get_device_properties(0)
print(f"GPU: {props.name} (sm_{props.major}{props.minor})")
print(f"Memory: {props.total_memory / (1024**3):.1f} GB")
print()
def test_paddleocr_dynamic(image_path: str):
"""Test PaddleOCR with dynamic execution."""
print("=" * 60)
print("PADDLEOCR DYNAMIC MODE TEST")
print("=" * 60)
# Import PaddleOCR
from paddleocr import PaddleOCR
# Try to force dynamic mode by setting use_static=False if available
# or by using the model in eval mode directly
print("Creating PaddleOCR instance...")
print("(This may download models on first run)")
try:
# Create OCR instance - this might still use inference internally
ocr = PaddleOCR(
text_detection_model_name='PP-OCRv4_mobile_det',
text_recognition_model_name='PP-OCRv4_mobile_rec',
use_angle_cls=False, # Simplify
lang='es',
)
# Load image
img = Image.open(image_path)
arr = np.array(img)
print(f"Image shape: {arr.shape}")
# Run prediction
print("Running OCR prediction...")
result = ocr.predict(arr)
# Parse results
res = result[0].json['res']
dt_polys = res.get('dt_polys', [])
rec_texts = res.get('rec_texts', [])
print()
print("RESULTS:")
print(f" Detected boxes: {len(dt_polys)}")
print(f" Recognized texts: {len(rec_texts)}")
if rec_texts:
print(f" First 5 texts: {rec_texts[:5]}")
return True
else:
print(" WARNING: No text recognized!")
return False
except Exception as e:
print(f"ERROR: {e}")
return False
def test_paddle_dynamic_model():
"""Test loading a paddle model in dynamic graph mode."""
print()
print("=" * 60)
print("PADDLE DYNAMIC GRAPH TEST")
print("=" * 60)
# Ensure we're in dynamic mode
paddle.disable_static()
# Test a simple model forward pass
print("Testing dynamic graph execution...")
# Create a simple ResNet-like block
x = paddle.randn([1, 3, 224, 224])
# Conv -> BN -> ReLU
conv = paddle.nn.Conv2D(3, 64, 7, stride=2, padding=3)
bn = paddle.nn.BatchNorm2D(64)
# Forward pass (dynamic mode - compiles at runtime)
y = conv(x)
y = bn(y)
y = paddle.nn.functional.relu(y)
print(f"Input shape: {x.shape}")
print(f"Output shape: {y.shape}")
print(f"Output min: {y.min().item():.4f}")
print(f"Output max: {y.max().item():.4f}")
print(f"Output mean: {y.mean().item():.4f}")
if y.min() != y.max():
print("Dynamic graph mode: WORKING")
return True
else:
print("Dynamic graph mode: BROKEN (constant output)")
return False
def test_ppocr_model_direct():
"""Try loading PPOCRv4 model directly in dynamic mode."""
print()
print("=" * 60)
print("PPOCR MODEL DIRECT LOAD TEST")
print("=" * 60)
try:
# Try to import ppocr modules directly
# This bypasses the inference predictor
from paddleocr.ppocr.modeling.architectures import build_model
from paddleocr.ppocr.postprocess import build_post_process
from paddleocr.ppocr.utils.save_load import load_model
print("Direct model import available")
# Note: This approach requires model config files
# which may or may not be bundled with paddleocr
except ImportError as e:
print(f"Direct model import not available: {e}")
print("PaddleOCR may only support inference mode")
return False
def main():
# Default test image
image_path = '/app/dataset/0/img/page_0001.png'
if len(sys.argv) > 1:
image_path = sys.argv[1]
if not os.path.exists(image_path):
print(f"Image not found: {image_path}")
sys.exit(1)
print(f"Testing with image: {image_path}")
print()
check_gpu()
# Test 1: Basic dynamic graph
dynamic_works = test_paddle_dynamic_model()
if not dynamic_works:
print("\nDynamic graph mode is broken - GPU likely unsupported")
sys.exit(1)
# Test 2: Direct model load
test_ppocr_model_direct()
# Test 3: PaddleOCR pipeline
ocr_works = test_paddleocr_dynamic(image_path)
print()
print("=" * 60)
print("SUMMARY")
print("=" * 60)
print(f"Dynamic graph mode: {'WORKS' if dynamic_works else 'BROKEN'}")
print(f"PaddleOCR pipeline: {'WORKS' if ocr_works else 'BROKEN'}")
if dynamic_works and not ocr_works:
print()
print("DIAGNOSIS: Dynamic mode works but PaddleOCR fails.")
print("This means PaddleOCR internally uses inference predictor")
print("which has pre-compiled kernels without Blackwell support.")
print()
print("Potential solutions:")
print("1. Modify PaddleOCR to use dynamic mode")
print("2. Use ONNX export + ONNXRuntime")
print("3. Wait for PaddlePaddle Blackwell support")
if __name__ == '__main__':
main()