#!/usr/bin/env python3 """ Test PaddleOCR in dynamic graph mode (not inference mode). Dynamic mode compiles kernels at runtime, which may work on Blackwell. Inference mode uses pre-compiled kernels which fail on sm_121. Usage: python test_dynamic_mode.py [image_path] """ import os import sys os.environ['DISABLE_MODEL_SOURCE_CHECK'] = 'True' # Force dynamic graph mode os.environ['FLAGS_enable_pir_api'] = '0' import numpy as np import paddle from PIL import Image def check_gpu(): """Check GPU status.""" print("=" * 60) print("GPU STATUS") print("=" * 60) print(f"Device: {paddle.device.get_device()}") print(f"CUDA compiled: {paddle.device.is_compiled_with_cuda()}") if paddle.device.is_compiled_with_cuda() and paddle.device.cuda.device_count() > 0: props = paddle.device.cuda.get_device_properties(0) print(f"GPU: {props.name} (sm_{props.major}{props.minor})") print(f"Memory: {props.total_memory / (1024**3):.1f} GB") print() def test_paddleocr_dynamic(image_path: str): """Test PaddleOCR with dynamic execution.""" print("=" * 60) print("PADDLEOCR DYNAMIC MODE TEST") print("=" * 60) # Import PaddleOCR from paddleocr import PaddleOCR # Try to force dynamic mode by setting use_static=False if available # or by using the model in eval mode directly print("Creating PaddleOCR instance...") print("(This may download models on first run)") try: # Create OCR instance - this might still use inference internally ocr = PaddleOCR( text_detection_model_name='PP-OCRv4_mobile_det', text_recognition_model_name='PP-OCRv4_mobile_rec', use_angle_cls=False, # Simplify lang='es', ) # Load image img = Image.open(image_path) arr = np.array(img) print(f"Image shape: {arr.shape}") # Run prediction print("Running OCR prediction...") result = ocr.predict(arr) # Parse results res = result[0].json['res'] dt_polys = res.get('dt_polys', []) rec_texts = res.get('rec_texts', []) print() print("RESULTS:") print(f" Detected boxes: {len(dt_polys)}") print(f" Recognized texts: {len(rec_texts)}") if rec_texts: print(f" First 5 texts: {rec_texts[:5]}") return True else: print(" WARNING: No text recognized!") return False except Exception as e: print(f"ERROR: {e}") return False def test_paddle_dynamic_model(): """Test loading a paddle model in dynamic graph mode.""" print() print("=" * 60) print("PADDLE DYNAMIC GRAPH TEST") print("=" * 60) # Ensure we're in dynamic mode paddle.disable_static() # Test a simple model forward pass print("Testing dynamic graph execution...") # Create a simple ResNet-like block x = paddle.randn([1, 3, 224, 224]) # Conv -> BN -> ReLU conv = paddle.nn.Conv2D(3, 64, 7, stride=2, padding=3) bn = paddle.nn.BatchNorm2D(64) # Forward pass (dynamic mode - compiles at runtime) y = conv(x) y = bn(y) y = paddle.nn.functional.relu(y) print(f"Input shape: {x.shape}") print(f"Output shape: {y.shape}") print(f"Output min: {y.min().item():.4f}") print(f"Output max: {y.max().item():.4f}") print(f"Output mean: {y.mean().item():.4f}") if y.min() != y.max(): print("Dynamic graph mode: WORKING") return True else: print("Dynamic graph mode: BROKEN (constant output)") return False def test_ppocr_model_direct(): """Try loading PPOCRv4 model directly in dynamic mode.""" print() print("=" * 60) print("PPOCR MODEL DIRECT LOAD TEST") print("=" * 60) try: # Try to import ppocr modules directly # This bypasses the inference predictor from paddleocr.ppocr.modeling.architectures import build_model from paddleocr.ppocr.postprocess import build_post_process from paddleocr.ppocr.utils.save_load import load_model print("Direct model import available") # Note: This approach requires model config files # which may or may not be bundled with paddleocr except ImportError as e: print(f"Direct model import not available: {e}") print("PaddleOCR may only support inference mode") return False def main(): # Default test image image_path = '/app/dataset/0/img/page_0001.png' if len(sys.argv) > 1: image_path = sys.argv[1] if not os.path.exists(image_path): print(f"Image not found: {image_path}") sys.exit(1) print(f"Testing with image: {image_path}") print() check_gpu() # Test 1: Basic dynamic graph dynamic_works = test_paddle_dynamic_model() if not dynamic_works: print("\nDynamic graph mode is broken - GPU likely unsupported") sys.exit(1) # Test 2: Direct model load test_ppocr_model_direct() # Test 3: PaddleOCR pipeline ocr_works = test_paddleocr_dynamic(image_path) print() print("=" * 60) print("SUMMARY") print("=" * 60) print(f"Dynamic graph mode: {'WORKS' if dynamic_works else 'BROKEN'}") print(f"PaddleOCR pipeline: {'WORKS' if ocr_works else 'BROKEN'}") if dynamic_works and not ocr_works: print() print("DIAGNOSIS: Dynamic mode works but PaddleOCR fails.") print("This means PaddleOCR internally uses inference predictor") print("which has pre-compiled kernels without Blackwell support.") print() print("Potential solutions:") print("1. Modify PaddleOCR to use dynamic mode") print("2. Use ONNX export + ONNXRuntime") print("3. Wait for PaddlePaddle Blackwell support") if __name__ == '__main__': main()