#!/usr/bin/env python3 """ Debug script for GPU OCR detection issues. This script tests the raw inference output from PaddlePaddle detection models to diagnose why detection might fail on certain GPU architectures (e.g., Blackwell/sm_121). Usage: docker exec paddle-ocr-gpu python /app/debug_gpu_detection.py [image_path] Expected behavior: - Working GPU: Output stats should show min close to 0, max close to 1, mean ~0.1-0.5 - Broken GPU: Output stats show constant values (e.g., min=max=mean=0.00001) """ import os import sys os.environ['DISABLE_MODEL_SOURCE_CHECK'] = 'True' import numpy as np import paddle from PIL import Image def check_gpu_status(): """Check GPU availability and properties.""" print("=" * 60) print("GPU STATUS") print("=" * 60) print(f"Device: {paddle.device.get_device()}") print(f"CUDA compiled: {paddle.device.is_compiled_with_cuda()}") if paddle.device.is_compiled_with_cuda(): print(f"GPU count: {paddle.device.cuda.device_count()}") if paddle.device.cuda.device_count() > 0: props = paddle.device.cuda.get_device_properties(0) print(f"GPU name: {props.name}") print(f"Compute capability: {props.major}.{props.minor}") print(f"Total memory: {props.total_memory / (1024**3):.2f} GB") print() def test_basic_ops(): """Test basic GPU tensor operations.""" print("=" * 60) print("BASIC GPU OPERATIONS") print("=" * 60) # Test tensor creation x = paddle.randn([2, 3]) print(f"Tensor place: {x.place}") # Test conv2d x = paddle.randn([1, 3, 64, 64]) conv = paddle.nn.Conv2D(3, 16, 3, padding=1) y = conv(x) print(f"Conv2d output shape: {y.shape}, place: {y.place}") # Test softmax s = paddle.nn.functional.softmax(y, axis=1) print(f"Softmax output shape: {s.shape}") print("Basic operations: OK") print() def test_detection_model(image_path: str): """Test detection model raw output.""" print("=" * 60) print("DETECTION MODEL TEST") print("=" * 60) from paddle.inference import Config, create_predictor model_dir = '/root/.paddlex/official_models/PP-OCRv4_mobile_det' inference_file = f'{model_dir}/inference.json' params_file = f'{model_dir}/inference.pdiparams' if not os.path.exists(inference_file): print(f"Model not found at {model_dir}") print("Run PaddleOCR once to download models first.") return # Create config config = Config() config.set_prog_file(inference_file) config.set_params_file(params_file) config.enable_use_gpu(1024, 0) print("Creating predictor...") predictor = create_predictor(config) # Get input/output names input_names = predictor.get_input_names() output_names = predictor.get_output_names() print(f"Input names: {input_names}") print(f"Output names: {output_names}") # Load and preprocess image img = Image.open(image_path) img = img.resize((640, 640)) arr = np.array(img).astype('float32') arr = arr / 255.0 arr = arr.transpose(2, 0, 1)[np.newaxis, ...] # NCHW print(f"Input tensor shape: {arr.shape}") # Set input input_handle = predictor.get_input_handle(input_names[0]) input_handle.reshape(arr.shape) input_handle.copy_from_cpu(arr) # Run prediction print("Running inference...") predictor.run() # Get output output_handle = predictor.get_output_handle(output_names[0]) output = output_handle.copy_to_cpu() print() print("OUTPUT ANALYSIS:") print(f" Shape: {output.shape}") print(f" Min: {output.min():.6f}") print(f" Max: {output.max():.6f}") print(f" Mean: {output.mean():.6f}") print(f" Std: {output.std():.6f}") print(f" Has NaN: {np.isnan(output).any()}") print(f" Has Inf: {np.isinf(output).any()}") # Diagnosis print() print("DIAGNOSIS:") if output.min() == output.max(): print(" PROBLEM: Output is constant - model inference is broken!") print(" This typically indicates GPU compute capability mismatch.") print(" GB10 (sm_121) may need CUDA 13.0+ for native support.") elif output.max() < 0.01: print(" PROBLEM: Output values too low - detection will find nothing.") elif np.isnan(output).any() or np.isinf(output).any(): print(" PROBLEM: Output contains NaN/Inf - numerical instability.") else: print(" OK: Output values look reasonable.") print(f" Detection threshold typically 0.3-0.6, max output is {output.max():.3f}") def test_paddleocr_output(image_path: str): """Test full PaddleOCR pipeline.""" print() print("=" * 60) print("PADDLEOCR PIPELINE TEST") print("=" * 60) from paddleocr import PaddleOCR ocr = PaddleOCR( text_detection_model_name='PP-OCRv4_mobile_det', text_recognition_model_name='PP-OCRv4_mobile_rec', ) img = Image.open(image_path) arr = np.array(img) out = ocr.predict(arr) res = out[0].json['res'] dt_polys = res.get('dt_polys', []) rec_texts = res.get('rec_texts', []) print(f"Detection polygons: {len(dt_polys)}") print(f"Recognition texts: {len(rec_texts)}") if rec_texts: print(f"Sample texts: {rec_texts[:5]}") else: print("No text detected!") def main(): # Default test image image_path = '/app/dataset/0/img/page_0001.png' if len(sys.argv) > 1: image_path = sys.argv[1] if not os.path.exists(image_path): print(f"Image not found: {image_path}") print("Usage: python debug_gpu_detection.py [image_path]") sys.exit(1) print(f"Testing with image: {image_path}") print() check_gpu_status() test_basic_ops() test_detection_model(image_path) test_paddleocr_output(image_path) if __name__ == '__main__': main()