src/paddle_ocr/scripts/test_dynamic_mode.py

#!/usr/bin/env python3
"""
Test PaddleOCR in dynamic graph mode (not inference mode).

Dynamic mode compiles kernels at runtime, which may work on Blackwell.
Inference mode uses pre-compiled kernels which fail on sm_121.

Usage:
    python test_dynamic_mode.py [image_path]
"""

import os
import sys

os.environ['DISABLE_MODEL_SOURCE_CHECK'] = 'True'
# Force dynamic graph mode
os.environ['FLAGS_enable_pir_api'] = '0'

import numpy as np
import paddle
from PIL import Image


def check_gpu():
    """Check GPU status."""
    print("=" * 60)
    print("GPU STATUS")
    print("=" * 60)
    print(f"Device: {paddle.device.get_device()}")
    print(f"CUDA compiled: {paddle.device.is_compiled_with_cuda()}")

    if paddle.device.is_compiled_with_cuda() and paddle.device.cuda.device_count() > 0:
        props = paddle.device.cuda.get_device_properties(0)
        print(f"GPU: {props.name} (sm_{props.major}{props.minor})")
        print(f"Memory: {props.total_memory / (1024**3):.1f} GB")
    print()


def test_paddleocr_dynamic(image_path: str):
    """Test PaddleOCR with dynamic execution."""
    print("=" * 60)
    print("PADDLEOCR DYNAMIC MODE TEST")
    print("=" * 60)

    # Import PaddleOCR
    from paddleocr import PaddleOCR

    # Try to force dynamic mode by setting use_static=False if available
    # or by using the model in eval mode directly

    print("Creating PaddleOCR instance...")
    print("(This may download models on first run)")

    try:
        # Create OCR instance - this might still use inference internally
        ocr = PaddleOCR(
            text_detection_model_name='PP-OCRv4_mobile_det',
            text_recognition_model_name='PP-OCRv4_mobile_rec',
            use_angle_cls=False,  # Simplify
            lang='es',
        )

        # Load image
        img = Image.open(image_path)
        arr = np.array(img)
        print(f"Image shape: {arr.shape}")

        # Run prediction
        print("Running OCR prediction...")
        result = ocr.predict(arr)

        # Parse results
        res = result[0].json['res']
        dt_polys = res.get('dt_polys', [])
        rec_texts = res.get('rec_texts', [])

        print()
        print("RESULTS:")
        print(f"  Detected boxes: {len(dt_polys)}")
        print(f"  Recognized texts: {len(rec_texts)}")

        if rec_texts:
            print(f"  First 5 texts: {rec_texts[:5]}")
            return True
        else:
            print("  WARNING: No text recognized!")
            return False

    except Exception as e:
        print(f"ERROR: {e}")
        return False


def test_paddle_dynamic_model():
    """Test loading a paddle model in dynamic graph mode."""
    print()
    print("=" * 60)
    print("PADDLE DYNAMIC GRAPH TEST")
    print("=" * 60)

    # Ensure we're in dynamic mode
    paddle.disable_static()

    # Test a simple model forward pass
    print("Testing dynamic graph execution...")

    # Create a simple ResNet-like block
    x = paddle.randn([1, 3, 224, 224])

    # Conv -> BN -> ReLU
    conv = paddle.nn.Conv2D(3, 64, 7, stride=2, padding=3)
    bn = paddle.nn.BatchNorm2D(64)

    # Forward pass (dynamic mode - compiles at runtime)
    y = conv(x)
    y = bn(y)
    y = paddle.nn.functional.relu(y)

    print(f"Input shape: {x.shape}")
    print(f"Output shape: {y.shape}")
    print(f"Output min: {y.min().item():.4f}")
    print(f"Output max: {y.max().item():.4f}")
    print(f"Output mean: {y.mean().item():.4f}")

    if y.min() != y.max():
        print("Dynamic graph mode: WORKING")
        return True
    else:
        print("Dynamic graph mode: BROKEN (constant output)")
        return False


def test_ppocr_model_direct():
    """Try loading PPOCRv4 model directly in dynamic mode."""
    print()
    print("=" * 60)
    print("PPOCR MODEL DIRECT LOAD TEST")
    print("=" * 60)

    try:
        # Try to import ppocr modules directly
        # This bypasses the inference predictor
        from paddleocr.ppocr.modeling.architectures import build_model
        from paddleocr.ppocr.postprocess import build_post_process
        from paddleocr.ppocr.utils.save_load import load_model

        print("Direct model import available")

        # Note: This approach requires model config files
        # which may or may not be bundled with paddleocr

    except ImportError as e:
        print(f"Direct model import not available: {e}")
        print("PaddleOCR may only support inference mode")

    return False


def main():
    # Default test image
    image_path = '/app/dataset/0/img/page_0001.png'
    if len(sys.argv) > 1:
        image_path = sys.argv[1]

    if not os.path.exists(image_path):
        print(f"Image not found: {image_path}")
        sys.exit(1)

    print(f"Testing with image: {image_path}")
    print()

    check_gpu()

    # Test 1: Basic dynamic graph
    dynamic_works = test_paddle_dynamic_model()

    if not dynamic_works:
        print("\nDynamic graph mode is broken - GPU likely unsupported")
        sys.exit(1)

    # Test 2: Direct model load
    test_ppocr_model_direct()

    # Test 3: PaddleOCR pipeline
    ocr_works = test_paddleocr_dynamic(image_path)

    print()
    print("=" * 60)
    print("SUMMARY")
    print("=" * 60)
    print(f"Dynamic graph mode: {'WORKS' if dynamic_works else 'BROKEN'}")
    print(f"PaddleOCR pipeline: {'WORKS' if ocr_works else 'BROKEN'}")

    if dynamic_works and not ocr_works:
        print()
        print("DIAGNOSIS: Dynamic mode works but PaddleOCR fails.")
        print("This means PaddleOCR internally uses inference predictor")
        print("which has pre-compiled kernels without Blackwell support.")
        print()
        print("Potential solutions:")
        print("1. Modify PaddleOCR to use dynamic mode")
        print("2. Use ONNX export + ONNXRuntime")
        print("3. Wait for PaddlePaddle Blackwell support")


if __name__ == '__main__':
    main()
More docs on gpu for paddle 2026-01-18 07:13:51 +01:00			`#!/usr/bin/env python3`
			`"""`
			`Test PaddleOCR in dynamic graph mode (not inference mode).`

			`Dynamic mode compiles kernels at runtime, which may work on Blackwell.`
			`Inference mode uses pre-compiled kernels which fail on sm_121.`

			`Usage:`
			`python test_dynamic_mode.py [image_path]`
			`"""`

			`import os`
			`import sys`

			`os.environ['DISABLE_MODEL_SOURCE_CHECK'] = 'True'`
			`# Force dynamic graph mode`
			`os.environ['FLAGS_enable_pir_api'] = '0'`

			`import numpy as np`
			`import paddle`
			`from PIL import Image`


			`def check_gpu():`
			`"""Check GPU status."""`
			`print("=" * 60)`
			`print("GPU STATUS")`
			`print("=" * 60)`
			`print(f"Device: {paddle.device.get_device()}")`
			`print(f"CUDA compiled: {paddle.device.is_compiled_with_cuda()}")`

			`if paddle.device.is_compiled_with_cuda() and paddle.device.cuda.device_count() > 0:`
			`props = paddle.device.cuda.get_device_properties(0)`
			`print(f"GPU: {props.name} (sm_{props.major}{props.minor})")`
			`print(f"Memory: {props.total_memory / (1024**3):.1f} GB")`
			`print()`


			`def test_paddleocr_dynamic(image_path: str):`
			`"""Test PaddleOCR with dynamic execution."""`
			`print("=" * 60)`
			`print("PADDLEOCR DYNAMIC MODE TEST")`
			`print("=" * 60)`

			`# Import PaddleOCR`
			`from paddleocr import PaddleOCR`

			`# Try to force dynamic mode by setting use_static=False if available`
			`# or by using the model in eval mode directly`

			`print("Creating PaddleOCR instance...")`
			`print("(This may download models on first run)")`

			`try:`
			`# Create OCR instance - this might still use inference internally`
			`ocr = PaddleOCR(`
			`text_detection_model_name='PP-OCRv4_mobile_det',`
			`text_recognition_model_name='PP-OCRv4_mobile_rec',`
			`use_angle_cls=False, # Simplify`
			`lang='es',`
			`)`

			`# Load image`
			`img = Image.open(image_path)`
			`arr = np.array(img)`
			`print(f"Image shape: {arr.shape}")`

			`# Run prediction`
			`print("Running OCR prediction...")`
			`result = ocr.predict(arr)`

			`# Parse results`
			`res = result[0].json['res']`
			`dt_polys = res.get('dt_polys', [])`
			`rec_texts = res.get('rec_texts', [])`

			`print()`
			`print("RESULTS:")`
			`print(f" Detected boxes: {len(dt_polys)}")`
			`print(f" Recognized texts: {len(rec_texts)}")`

			`if rec_texts:`
			`print(f" First 5 texts: {rec_texts[:5]}")`
			`return True`
			`else:`
			`print(" WARNING: No text recognized!")`
			`return False`

			`except Exception as e:`
			`print(f"ERROR: {e}")`
			`return False`


			`def test_paddle_dynamic_model():`
			`"""Test loading a paddle model in dynamic graph mode."""`
			`print()`
			`print("=" * 60)`
			`print("PADDLE DYNAMIC GRAPH TEST")`
			`print("=" * 60)`

			`# Ensure we're in dynamic mode`
			`paddle.disable_static()`

			`# Test a simple model forward pass`
			`print("Testing dynamic graph execution...")`

			`# Create a simple ResNet-like block`
			`x = paddle.randn([1, 3, 224, 224])`

			`# Conv -> BN -> ReLU`
			`conv = paddle.nn.Conv2D(3, 64, 7, stride=2, padding=3)`
			`bn = paddle.nn.BatchNorm2D(64)`

			`# Forward pass (dynamic mode - compiles at runtime)`
			`y = conv(x)`
			`y = bn(y)`
			`y = paddle.nn.functional.relu(y)`

			`print(f"Input shape: {x.shape}")`
			`print(f"Output shape: {y.shape}")`
			`print(f"Output min: {y.min().item():.4f}")`
			`print(f"Output max: {y.max().item():.4f}")`
			`print(f"Output mean: {y.mean().item():.4f}")`

			`if y.min() != y.max():`
			`print("Dynamic graph mode: WORKING")`
			`return True`
			`else:`
			`print("Dynamic graph mode: BROKEN (constant output)")`
			`return False`


			`def test_ppocr_model_direct():`
			`"""Try loading PPOCRv4 model directly in dynamic mode."""`
			`print()`
			`print("=" * 60)`
			`print("PPOCR MODEL DIRECT LOAD TEST")`
			`print("=" * 60)`

			`try:`
			`# Try to import ppocr modules directly`
			`# This bypasses the inference predictor`
			`from paddleocr.ppocr.modeling.architectures import build_model`
			`from paddleocr.ppocr.postprocess import build_post_process`
			`from paddleocr.ppocr.utils.save_load import load_model`

			`print("Direct model import available")`

			`# Note: This approach requires model config files`
			`# which may or may not be bundled with paddleocr`

			`except ImportError as e:`
			`print(f"Direct model import not available: {e}")`
			`print("PaddleOCR may only support inference mode")`

			`return False`


			`def main():`
			`# Default test image`
			`image_path = '/app/dataset/0/img/page_0001.png'`
			`if len(sys.argv) > 1:`
			`image_path = sys.argv[1]`

			`if not os.path.exists(image_path):`
			`print(f"Image not found: {image_path}")`
			`sys.exit(1)`

			`print(f"Testing with image: {image_path}")`
			`print()`

			`check_gpu()`

			`# Test 1: Basic dynamic graph`
			`dynamic_works = test_paddle_dynamic_model()`

			`if not dynamic_works:`
			`print("\nDynamic graph mode is broken - GPU likely unsupported")`
			`sys.exit(1)`

			`# Test 2: Direct model load`
			`test_ppocr_model_direct()`

			`# Test 3: PaddleOCR pipeline`
			`ocr_works = test_paddleocr_dynamic(image_path)`

			`print()`
			`print("=" * 60)`
			`print("SUMMARY")`
			`print("=" * 60)`
			`print(f"Dynamic graph mode: {'WORKS' if dynamic_works else 'BROKEN'}")`
			`print(f"PaddleOCR pipeline: {'WORKS' if ocr_works else 'BROKEN'}")`

			`if dynamic_works and not ocr_works:`
			`print()`
			`print("DIAGNOSIS: Dynamic mode works but PaddleOCR fails.")`
			`print("This means PaddleOCR internally uses inference predictor")`
			`print("which has pre-compiled kernels without Blackwell support.")`
			`print()`
			`print("Potential solutions:")`
			`print("1. Modify PaddleOCR to use dynamic mode")`
			`print("2. Use ONNX export + ONNXRuntime")`
			`print("3. Wait for PaddlePaddle Blackwell support")`


			`if __name__ == '__main__':`
			`main()`