Files
MastersThesis/src/paddle_ocr/scripts/debug_gpu_detection.py
sergio c7ed7b2b9c
All checks were successful
build_docker / essential (push) Successful in 0s
build_docker / build_cpu (push) Successful in 5m0s
build_docker / build_gpu (push) Successful in 22m55s
build_docker / build_easyocr (push) Successful in 18m47s
build_docker / build_easyocr_gpu (push) Successful in 19m0s
build_docker / build_raytune (push) Successful in 3m27s
build_docker / build_doctr (push) Successful in 19m42s
build_docker / build_doctr_gpu (push) Successful in 14m49s
Paddle ocr, easyicr and doctr gpu support. (#4)
2026-01-19 17:35:24 +00:00

200 lines
6.0 KiB
Python

#!/usr/bin/env python3
"""
Debug script for GPU OCR detection issues.
This script tests the raw inference output from PaddlePaddle detection models
to diagnose why detection might fail on certain GPU architectures (e.g., Blackwell/sm_121).
Usage:
docker exec paddle-ocr-gpu python /app/debug_gpu_detection.py [image_path]
Expected behavior:
- Working GPU: Output stats should show min close to 0, max close to 1, mean ~0.1-0.5
- Broken GPU: Output stats show constant values (e.g., min=max=mean=0.00001)
"""
import os
import sys
os.environ['DISABLE_MODEL_SOURCE_CHECK'] = 'True'
import numpy as np
import paddle
from PIL import Image
def check_gpu_status():
"""Check GPU availability and properties."""
print("=" * 60)
print("GPU STATUS")
print("=" * 60)
print(f"Device: {paddle.device.get_device()}")
print(f"CUDA compiled: {paddle.device.is_compiled_with_cuda()}")
if paddle.device.is_compiled_with_cuda():
print(f"GPU count: {paddle.device.cuda.device_count()}")
if paddle.device.cuda.device_count() > 0:
props = paddle.device.cuda.get_device_properties(0)
print(f"GPU name: {props.name}")
print(f"Compute capability: {props.major}.{props.minor}")
print(f"Total memory: {props.total_memory / (1024**3):.2f} GB")
print()
def test_basic_ops():
"""Test basic GPU tensor operations."""
print("=" * 60)
print("BASIC GPU OPERATIONS")
print("=" * 60)
# Test tensor creation
x = paddle.randn([2, 3])
print(f"Tensor place: {x.place}")
# Test conv2d
x = paddle.randn([1, 3, 64, 64])
conv = paddle.nn.Conv2D(3, 16, 3, padding=1)
y = conv(x)
print(f"Conv2d output shape: {y.shape}, place: {y.place}")
# Test softmax
s = paddle.nn.functional.softmax(y, axis=1)
print(f"Softmax output shape: {s.shape}")
print("Basic operations: OK")
print()
def test_detection_model(image_path: str):
"""Test detection model raw output."""
print("=" * 60)
print("DETECTION MODEL TEST")
print("=" * 60)
from paddle.inference import Config, create_predictor
model_dir = '/root/.paddlex/official_models/PP-OCRv4_mobile_det'
inference_file = f'{model_dir}/inference.json'
params_file = f'{model_dir}/inference.pdiparams'
if not os.path.exists(inference_file):
print(f"Model not found at {model_dir}")
print("Run PaddleOCR once to download models first.")
return
# Create config
config = Config()
config.set_prog_file(inference_file)
config.set_params_file(params_file)
config.enable_use_gpu(1024, 0)
print("Creating predictor...")
predictor = create_predictor(config)
# Get input/output names
input_names = predictor.get_input_names()
output_names = predictor.get_output_names()
print(f"Input names: {input_names}")
print(f"Output names: {output_names}")
# Load and preprocess image
img = Image.open(image_path)
img = img.resize((640, 640))
arr = np.array(img).astype('float32')
arr = arr / 255.0
arr = arr.transpose(2, 0, 1)[np.newaxis, ...] # NCHW
print(f"Input tensor shape: {arr.shape}")
# Set input
input_handle = predictor.get_input_handle(input_names[0])
input_handle.reshape(arr.shape)
input_handle.copy_from_cpu(arr)
# Run prediction
print("Running inference...")
predictor.run()
# Get output
output_handle = predictor.get_output_handle(output_names[0])
output = output_handle.copy_to_cpu()
print()
print("OUTPUT ANALYSIS:")
print(f" Shape: {output.shape}")
print(f" Min: {output.min():.6f}")
print(f" Max: {output.max():.6f}")
print(f" Mean: {output.mean():.6f}")
print(f" Std: {output.std():.6f}")
print(f" Has NaN: {np.isnan(output).any()}")
print(f" Has Inf: {np.isinf(output).any()}")
# Diagnosis
print()
print("DIAGNOSIS:")
if output.min() == output.max():
print(" PROBLEM: Output is constant - model inference is broken!")
print(" This typically indicates GPU compute capability mismatch.")
print(" GB10 (sm_121) may need CUDA 13.0+ for native support.")
elif output.max() < 0.01:
print(" PROBLEM: Output values too low - detection will find nothing.")
elif np.isnan(output).any() or np.isinf(output).any():
print(" PROBLEM: Output contains NaN/Inf - numerical instability.")
else:
print(" OK: Output values look reasonable.")
print(f" Detection threshold typically 0.3-0.6, max output is {output.max():.3f}")
def test_paddleocr_output(image_path: str):
"""Test full PaddleOCR pipeline."""
print()
print("=" * 60)
print("PADDLEOCR PIPELINE TEST")
print("=" * 60)
from paddleocr import PaddleOCR
ocr = PaddleOCR(
text_detection_model_name='PP-OCRv4_mobile_det',
text_recognition_model_name='PP-OCRv4_mobile_rec',
)
img = Image.open(image_path)
arr = np.array(img)
out = ocr.predict(arr)
res = out[0].json['res']
dt_polys = res.get('dt_polys', [])
rec_texts = res.get('rec_texts', [])
print(f"Detection polygons: {len(dt_polys)}")
print(f"Recognition texts: {len(rec_texts)}")
if rec_texts:
print(f"Sample texts: {rec_texts[:5]}")
else:
print("No text detected!")
def main():
# Default test image
image_path = '/app/dataset/0/img/page_0001.png'
if len(sys.argv) > 1:
image_path = sys.argv[1]
if not os.path.exists(image_path):
print(f"Image not found: {image_path}")
print("Usage: python debug_gpu_detection.py [image_path]")
sys.exit(1)
print(f"Testing with image: {image_path}")
print()
check_gpu_status()
test_basic_ops()
test_detection_model(image_path)
test_paddleocr_output(image_path)
if __name__ == '__main__':
main()