Paddle ocr, easyicr and doctr gpu support. (#4)
All checks were successful
build_docker / essential (push) Successful in 0s
build_docker / build_cpu (push) Successful in 5m0s
build_docker / build_gpu (push) Successful in 22m55s
build_docker / build_easyocr (push) Successful in 18m47s
build_docker / build_easyocr_gpu (push) Successful in 19m0s
build_docker / build_raytune (push) Successful in 3m27s
build_docker / build_doctr (push) Successful in 19m42s
build_docker / build_doctr_gpu (push) Successful in 14m49s
All checks were successful
build_docker / essential (push) Successful in 0s
build_docker / build_cpu (push) Successful in 5m0s
build_docker / build_gpu (push) Successful in 22m55s
build_docker / build_easyocr (push) Successful in 18m47s
build_docker / build_easyocr_gpu (push) Successful in 19m0s
build_docker / build_raytune (push) Successful in 3m27s
build_docker / build_doctr (push) Successful in 19m42s
build_docker / build_doctr_gpu (push) Successful in 14m49s
This commit was merged in pull request #4.
This commit is contained in:
199
src/paddle_ocr/scripts/debug_gpu_detection.py
Normal file
199
src/paddle_ocr/scripts/debug_gpu_detection.py
Normal file
@@ -0,0 +1,199 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Debug script for GPU OCR detection issues.
|
||||
|
||||
This script tests the raw inference output from PaddlePaddle detection models
|
||||
to diagnose why detection might fail on certain GPU architectures (e.g., Blackwell/sm_121).
|
||||
|
||||
Usage:
|
||||
docker exec paddle-ocr-gpu python /app/debug_gpu_detection.py [image_path]
|
||||
|
||||
Expected behavior:
|
||||
- Working GPU: Output stats should show min close to 0, max close to 1, mean ~0.1-0.5
|
||||
- Broken GPU: Output stats show constant values (e.g., min=max=mean=0.00001)
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
os.environ['DISABLE_MODEL_SOURCE_CHECK'] = 'True'
|
||||
|
||||
import numpy as np
|
||||
import paddle
|
||||
from PIL import Image
|
||||
|
||||
|
||||
def check_gpu_status():
|
||||
"""Check GPU availability and properties."""
|
||||
print("=" * 60)
|
||||
print("GPU STATUS")
|
||||
print("=" * 60)
|
||||
print(f"Device: {paddle.device.get_device()}")
|
||||
print(f"CUDA compiled: {paddle.device.is_compiled_with_cuda()}")
|
||||
|
||||
if paddle.device.is_compiled_with_cuda():
|
||||
print(f"GPU count: {paddle.device.cuda.device_count()}")
|
||||
if paddle.device.cuda.device_count() > 0:
|
||||
props = paddle.device.cuda.get_device_properties(0)
|
||||
print(f"GPU name: {props.name}")
|
||||
print(f"Compute capability: {props.major}.{props.minor}")
|
||||
print(f"Total memory: {props.total_memory / (1024**3):.2f} GB")
|
||||
print()
|
||||
|
||||
|
||||
def test_basic_ops():
|
||||
"""Test basic GPU tensor operations."""
|
||||
print("=" * 60)
|
||||
print("BASIC GPU OPERATIONS")
|
||||
print("=" * 60)
|
||||
|
||||
# Test tensor creation
|
||||
x = paddle.randn([2, 3])
|
||||
print(f"Tensor place: {x.place}")
|
||||
|
||||
# Test conv2d
|
||||
x = paddle.randn([1, 3, 64, 64])
|
||||
conv = paddle.nn.Conv2D(3, 16, 3, padding=1)
|
||||
y = conv(x)
|
||||
print(f"Conv2d output shape: {y.shape}, place: {y.place}")
|
||||
|
||||
# Test softmax
|
||||
s = paddle.nn.functional.softmax(y, axis=1)
|
||||
print(f"Softmax output shape: {s.shape}")
|
||||
print("Basic operations: OK")
|
||||
print()
|
||||
|
||||
|
||||
def test_detection_model(image_path: str):
|
||||
"""Test detection model raw output."""
|
||||
print("=" * 60)
|
||||
print("DETECTION MODEL TEST")
|
||||
print("=" * 60)
|
||||
|
||||
from paddle.inference import Config, create_predictor
|
||||
|
||||
model_dir = '/root/.paddlex/official_models/PP-OCRv4_mobile_det'
|
||||
inference_file = f'{model_dir}/inference.json'
|
||||
params_file = f'{model_dir}/inference.pdiparams'
|
||||
|
||||
if not os.path.exists(inference_file):
|
||||
print(f"Model not found at {model_dir}")
|
||||
print("Run PaddleOCR once to download models first.")
|
||||
return
|
||||
|
||||
# Create config
|
||||
config = Config()
|
||||
config.set_prog_file(inference_file)
|
||||
config.set_params_file(params_file)
|
||||
config.enable_use_gpu(1024, 0)
|
||||
|
||||
print("Creating predictor...")
|
||||
predictor = create_predictor(config)
|
||||
|
||||
# Get input/output names
|
||||
input_names = predictor.get_input_names()
|
||||
output_names = predictor.get_output_names()
|
||||
print(f"Input names: {input_names}")
|
||||
print(f"Output names: {output_names}")
|
||||
|
||||
# Load and preprocess image
|
||||
img = Image.open(image_path)
|
||||
img = img.resize((640, 640))
|
||||
arr = np.array(img).astype('float32')
|
||||
arr = arr / 255.0
|
||||
arr = arr.transpose(2, 0, 1)[np.newaxis, ...] # NCHW
|
||||
print(f"Input tensor shape: {arr.shape}")
|
||||
|
||||
# Set input
|
||||
input_handle = predictor.get_input_handle(input_names[0])
|
||||
input_handle.reshape(arr.shape)
|
||||
input_handle.copy_from_cpu(arr)
|
||||
|
||||
# Run prediction
|
||||
print("Running inference...")
|
||||
predictor.run()
|
||||
|
||||
# Get output
|
||||
output_handle = predictor.get_output_handle(output_names[0])
|
||||
output = output_handle.copy_to_cpu()
|
||||
|
||||
print()
|
||||
print("OUTPUT ANALYSIS:")
|
||||
print(f" Shape: {output.shape}")
|
||||
print(f" Min: {output.min():.6f}")
|
||||
print(f" Max: {output.max():.6f}")
|
||||
print(f" Mean: {output.mean():.6f}")
|
||||
print(f" Std: {output.std():.6f}")
|
||||
print(f" Has NaN: {np.isnan(output).any()}")
|
||||
print(f" Has Inf: {np.isinf(output).any()}")
|
||||
|
||||
# Diagnosis
|
||||
print()
|
||||
print("DIAGNOSIS:")
|
||||
if output.min() == output.max():
|
||||
print(" PROBLEM: Output is constant - model inference is broken!")
|
||||
print(" This typically indicates GPU compute capability mismatch.")
|
||||
print(" GB10 (sm_121) may need CUDA 13.0+ for native support.")
|
||||
elif output.max() < 0.01:
|
||||
print(" PROBLEM: Output values too low - detection will find nothing.")
|
||||
elif np.isnan(output).any() or np.isinf(output).any():
|
||||
print(" PROBLEM: Output contains NaN/Inf - numerical instability.")
|
||||
else:
|
||||
print(" OK: Output values look reasonable.")
|
||||
print(f" Detection threshold typically 0.3-0.6, max output is {output.max():.3f}")
|
||||
|
||||
|
||||
def test_paddleocr_output(image_path: str):
|
||||
"""Test full PaddleOCR pipeline."""
|
||||
print()
|
||||
print("=" * 60)
|
||||
print("PADDLEOCR PIPELINE TEST")
|
||||
print("=" * 60)
|
||||
|
||||
from paddleocr import PaddleOCR
|
||||
|
||||
ocr = PaddleOCR(
|
||||
text_detection_model_name='PP-OCRv4_mobile_det',
|
||||
text_recognition_model_name='PP-OCRv4_mobile_rec',
|
||||
)
|
||||
|
||||
img = Image.open(image_path)
|
||||
arr = np.array(img)
|
||||
|
||||
out = ocr.predict(arr)
|
||||
res = out[0].json['res']
|
||||
|
||||
dt_polys = res.get('dt_polys', [])
|
||||
rec_texts = res.get('rec_texts', [])
|
||||
|
||||
print(f"Detection polygons: {len(dt_polys)}")
|
||||
print(f"Recognition texts: {len(rec_texts)}")
|
||||
|
||||
if rec_texts:
|
||||
print(f"Sample texts: {rec_texts[:5]}")
|
||||
else:
|
||||
print("No text detected!")
|
||||
|
||||
|
||||
def main():
|
||||
# Default test image
|
||||
image_path = '/app/dataset/0/img/page_0001.png'
|
||||
if len(sys.argv) > 1:
|
||||
image_path = sys.argv[1]
|
||||
|
||||
if not os.path.exists(image_path):
|
||||
print(f"Image not found: {image_path}")
|
||||
print("Usage: python debug_gpu_detection.py [image_path]")
|
||||
sys.exit(1)
|
||||
|
||||
print(f"Testing with image: {image_path}")
|
||||
print()
|
||||
|
||||
check_gpu_status()
|
||||
test_basic_ops()
|
||||
test_detection_model(image_path)
|
||||
test_paddleocr_output(image_path)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user