200 lines
6.0 KiB
Python
200 lines
6.0 KiB
Python
|
|
#!/usr/bin/env python3
|
||
|
|
"""
|
||
|
|
Debug script for GPU OCR detection issues.
|
||
|
|
|
||
|
|
This script tests the raw inference output from PaddlePaddle detection models
|
||
|
|
to diagnose why detection might fail on certain GPU architectures (e.g., Blackwell/sm_121).
|
||
|
|
|
||
|
|
Usage:
|
||
|
|
docker exec paddle-ocr-gpu python /app/debug_gpu_detection.py [image_path]
|
||
|
|
|
||
|
|
Expected behavior:
|
||
|
|
- Working GPU: Output stats should show min close to 0, max close to 1, mean ~0.1-0.5
|
||
|
|
- Broken GPU: Output stats show constant values (e.g., min=max=mean=0.00001)
|
||
|
|
"""
|
||
|
|
|
||
|
|
import os
|
||
|
|
import sys
|
||
|
|
|
||
|
|
os.environ['DISABLE_MODEL_SOURCE_CHECK'] = 'True'
|
||
|
|
|
||
|
|
import numpy as np
|
||
|
|
import paddle
|
||
|
|
from PIL import Image
|
||
|
|
|
||
|
|
|
||
|
|
def check_gpu_status():
|
||
|
|
"""Check GPU availability and properties."""
|
||
|
|
print("=" * 60)
|
||
|
|
print("GPU STATUS")
|
||
|
|
print("=" * 60)
|
||
|
|
print(f"Device: {paddle.device.get_device()}")
|
||
|
|
print(f"CUDA compiled: {paddle.device.is_compiled_with_cuda()}")
|
||
|
|
|
||
|
|
if paddle.device.is_compiled_with_cuda():
|
||
|
|
print(f"GPU count: {paddle.device.cuda.device_count()}")
|
||
|
|
if paddle.device.cuda.device_count() > 0:
|
||
|
|
props = paddle.device.cuda.get_device_properties(0)
|
||
|
|
print(f"GPU name: {props.name}")
|
||
|
|
print(f"Compute capability: {props.major}.{props.minor}")
|
||
|
|
print(f"Total memory: {props.total_memory / (1024**3):.2f} GB")
|
||
|
|
print()
|
||
|
|
|
||
|
|
|
||
|
|
def test_basic_ops():
|
||
|
|
"""Test basic GPU tensor operations."""
|
||
|
|
print("=" * 60)
|
||
|
|
print("BASIC GPU OPERATIONS")
|
||
|
|
print("=" * 60)
|
||
|
|
|
||
|
|
# Test tensor creation
|
||
|
|
x = paddle.randn([2, 3])
|
||
|
|
print(f"Tensor place: {x.place}")
|
||
|
|
|
||
|
|
# Test conv2d
|
||
|
|
x = paddle.randn([1, 3, 64, 64])
|
||
|
|
conv = paddle.nn.Conv2D(3, 16, 3, padding=1)
|
||
|
|
y = conv(x)
|
||
|
|
print(f"Conv2d output shape: {y.shape}, place: {y.place}")
|
||
|
|
|
||
|
|
# Test softmax
|
||
|
|
s = paddle.nn.functional.softmax(y, axis=1)
|
||
|
|
print(f"Softmax output shape: {s.shape}")
|
||
|
|
print("Basic operations: OK")
|
||
|
|
print()
|
||
|
|
|
||
|
|
|
||
|
|
def test_detection_model(image_path: str):
|
||
|
|
"""Test detection model raw output."""
|
||
|
|
print("=" * 60)
|
||
|
|
print("DETECTION MODEL TEST")
|
||
|
|
print("=" * 60)
|
||
|
|
|
||
|
|
from paddle.inference import Config, create_predictor
|
||
|
|
|
||
|
|
model_dir = '/root/.paddlex/official_models/PP-OCRv4_mobile_det'
|
||
|
|
inference_file = f'{model_dir}/inference.json'
|
||
|
|
params_file = f'{model_dir}/inference.pdiparams'
|
||
|
|
|
||
|
|
if not os.path.exists(inference_file):
|
||
|
|
print(f"Model not found at {model_dir}")
|
||
|
|
print("Run PaddleOCR once to download models first.")
|
||
|
|
return
|
||
|
|
|
||
|
|
# Create config
|
||
|
|
config = Config()
|
||
|
|
config.set_prog_file(inference_file)
|
||
|
|
config.set_params_file(params_file)
|
||
|
|
config.enable_use_gpu(1024, 0)
|
||
|
|
|
||
|
|
print("Creating predictor...")
|
||
|
|
predictor = create_predictor(config)
|
||
|
|
|
||
|
|
# Get input/output names
|
||
|
|
input_names = predictor.get_input_names()
|
||
|
|
output_names = predictor.get_output_names()
|
||
|
|
print(f"Input names: {input_names}")
|
||
|
|
print(f"Output names: {output_names}")
|
||
|
|
|
||
|
|
# Load and preprocess image
|
||
|
|
img = Image.open(image_path)
|
||
|
|
img = img.resize((640, 640))
|
||
|
|
arr = np.array(img).astype('float32')
|
||
|
|
arr = arr / 255.0
|
||
|
|
arr = arr.transpose(2, 0, 1)[np.newaxis, ...] # NCHW
|
||
|
|
print(f"Input tensor shape: {arr.shape}")
|
||
|
|
|
||
|
|
# Set input
|
||
|
|
input_handle = predictor.get_input_handle(input_names[0])
|
||
|
|
input_handle.reshape(arr.shape)
|
||
|
|
input_handle.copy_from_cpu(arr)
|
||
|
|
|
||
|
|
# Run prediction
|
||
|
|
print("Running inference...")
|
||
|
|
predictor.run()
|
||
|
|
|
||
|
|
# Get output
|
||
|
|
output_handle = predictor.get_output_handle(output_names[0])
|
||
|
|
output = output_handle.copy_to_cpu()
|
||
|
|
|
||
|
|
print()
|
||
|
|
print("OUTPUT ANALYSIS:")
|
||
|
|
print(f" Shape: {output.shape}")
|
||
|
|
print(f" Min: {output.min():.6f}")
|
||
|
|
print(f" Max: {output.max():.6f}")
|
||
|
|
print(f" Mean: {output.mean():.6f}")
|
||
|
|
print(f" Std: {output.std():.6f}")
|
||
|
|
print(f" Has NaN: {np.isnan(output).any()}")
|
||
|
|
print(f" Has Inf: {np.isinf(output).any()}")
|
||
|
|
|
||
|
|
# Diagnosis
|
||
|
|
print()
|
||
|
|
print("DIAGNOSIS:")
|
||
|
|
if output.min() == output.max():
|
||
|
|
print(" PROBLEM: Output is constant - model inference is broken!")
|
||
|
|
print(" This typically indicates GPU compute capability mismatch.")
|
||
|
|
print(" GB10 (sm_121) may need CUDA 13.0+ for native support.")
|
||
|
|
elif output.max() < 0.01:
|
||
|
|
print(" PROBLEM: Output values too low - detection will find nothing.")
|
||
|
|
elif np.isnan(output).any() or np.isinf(output).any():
|
||
|
|
print(" PROBLEM: Output contains NaN/Inf - numerical instability.")
|
||
|
|
else:
|
||
|
|
print(" OK: Output values look reasonable.")
|
||
|
|
print(f" Detection threshold typically 0.3-0.6, max output is {output.max():.3f}")
|
||
|
|
|
||
|
|
|
||
|
|
def test_paddleocr_output(image_path: str):
|
||
|
|
"""Test full PaddleOCR pipeline."""
|
||
|
|
print()
|
||
|
|
print("=" * 60)
|
||
|
|
print("PADDLEOCR PIPELINE TEST")
|
||
|
|
print("=" * 60)
|
||
|
|
|
||
|
|
from paddleocr import PaddleOCR
|
||
|
|
|
||
|
|
ocr = PaddleOCR(
|
||
|
|
text_detection_model_name='PP-OCRv4_mobile_det',
|
||
|
|
text_recognition_model_name='PP-OCRv4_mobile_rec',
|
||
|
|
)
|
||
|
|
|
||
|
|
img = Image.open(image_path)
|
||
|
|
arr = np.array(img)
|
||
|
|
|
||
|
|
out = ocr.predict(arr)
|
||
|
|
res = out[0].json['res']
|
||
|
|
|
||
|
|
dt_polys = res.get('dt_polys', [])
|
||
|
|
rec_texts = res.get('rec_texts', [])
|
||
|
|
|
||
|
|
print(f"Detection polygons: {len(dt_polys)}")
|
||
|
|
print(f"Recognition texts: {len(rec_texts)}")
|
||
|
|
|
||
|
|
if rec_texts:
|
||
|
|
print(f"Sample texts: {rec_texts[:5]}")
|
||
|
|
else:
|
||
|
|
print("No text detected!")
|
||
|
|
|
||
|
|
|
||
|
|
def main():
|
||
|
|
# Default test image
|
||
|
|
image_path = '/app/dataset/0/img/page_0001.png'
|
||
|
|
if len(sys.argv) > 1:
|
||
|
|
image_path = sys.argv[1]
|
||
|
|
|
||
|
|
if not os.path.exists(image_path):
|
||
|
|
print(f"Image not found: {image_path}")
|
||
|
|
print("Usage: python debug_gpu_detection.py [image_path]")
|
||
|
|
sys.exit(1)
|
||
|
|
|
||
|
|
print(f"Testing with image: {image_path}")
|
||
|
|
print()
|
||
|
|
|
||
|
|
check_gpu_status()
|
||
|
|
test_basic_ops()
|
||
|
|
test_detection_model(image_path)
|
||
|
|
test_paddleocr_output(image_path)
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == '__main__':
|
||
|
|
main()
|