#!/usr/bin/env python3
"""
Debug script for GPU OCR detection issues.

This script tests the raw inference output from PaddlePaddle detection models
to diagnose why detection might fail on certain GPU architectures (e.g., Blackwell/sm_121).

Usage:
    docker exec paddle-ocr-gpu python /app/debug_gpu_detection.py [image_path]

Expected behavior:
    - Working GPU: Output stats should show min close to 0, max close to 1, mean ~0.1-0.5
    - Broken GPU: Output stats show constant values (e.g., min=max=mean=0.00001)
"""

import os
import sys

os.environ['DISABLE_MODEL_SOURCE_CHECK'] = 'True'

import numpy as np
import paddle
from PIL import Image


def check_gpu_status():
    """Check GPU availability and properties."""
    print("=" * 60)
    print("GPU STATUS")
    print("=" * 60)
    print(f"Device: {paddle.device.get_device()}")
    print(f"CUDA compiled: {paddle.device.is_compiled_with_cuda()}")

    if paddle.device.is_compiled_with_cuda():
        print(f"GPU count: {paddle.device.cuda.device_count()}")
        if paddle.device.cuda.device_count() > 0:
            props = paddle.device.cuda.get_device_properties(0)
            print(f"GPU name: {props.name}")
            print(f"Compute capability: {props.major}.{props.minor}")
            print(f"Total memory: {props.total_memory / (1024**3):.2f} GB")
    print()


def test_basic_ops():
    """Test basic GPU tensor operations."""
    print("=" * 60)
    print("BASIC GPU OPERATIONS")
    print("=" * 60)

    # Test tensor creation
    x = paddle.randn([2, 3])
    print(f"Tensor place: {x.place}")

    # Test conv2d
    x = paddle.randn([1, 3, 64, 64])
    conv = paddle.nn.Conv2D(3, 16, 3, padding=1)
    y = conv(x)
    print(f"Conv2d output shape: {y.shape}, place: {y.place}")

    # Test softmax
    s = paddle.nn.functional.softmax(y, axis=1)
    print(f"Softmax output shape: {s.shape}")
    print("Basic operations: OK")
    print()


def test_detection_model(image_path: str):
    """Test detection model raw output."""
    print("=" * 60)
    print("DETECTION MODEL TEST")
    print("=" * 60)

    from paddle.inference import Config, create_predictor

    model_dir = '/root/.paddlex/official_models/PP-OCRv4_mobile_det'
    inference_file = f'{model_dir}/inference.json'
    params_file = f'{model_dir}/inference.pdiparams'

    if not os.path.exists(inference_file):
        print(f"Model not found at {model_dir}")
        print("Run PaddleOCR once to download models first.")
        return

    # Create config
    config = Config()
    config.set_prog_file(inference_file)
    config.set_params_file(params_file)
    config.enable_use_gpu(1024, 0)

    print("Creating predictor...")
    predictor = create_predictor(config)

    # Get input/output names
    input_names = predictor.get_input_names()
    output_names = predictor.get_output_names()
    print(f"Input names: {input_names}")
    print(f"Output names: {output_names}")

    # Load and preprocess image
    img = Image.open(image_path)
    img = img.resize((640, 640))
    arr = np.array(img).astype('float32')
    arr = arr / 255.0
    arr = arr.transpose(2, 0, 1)[np.newaxis, ...]  # NCHW
    print(f"Input tensor shape: {arr.shape}")

    # Set input
    input_handle = predictor.get_input_handle(input_names[0])
    input_handle.reshape(arr.shape)
    input_handle.copy_from_cpu(arr)

    # Run prediction
    print("Running inference...")
    predictor.run()

    # Get output
    output_handle = predictor.get_output_handle(output_names[0])
    output = output_handle.copy_to_cpu()

    print()
    print("OUTPUT ANALYSIS:")
    print(f"  Shape: {output.shape}")
    print(f"  Min: {output.min():.6f}")
    print(f"  Max: {output.max():.6f}")
    print(f"  Mean: {output.mean():.6f}")
    print(f"  Std: {output.std():.6f}")
    print(f"  Has NaN: {np.isnan(output).any()}")
    print(f"  Has Inf: {np.isinf(output).any()}")

    # Diagnosis
    print()
    print("DIAGNOSIS:")
    if output.min() == output.max():
        print("  PROBLEM: Output is constant - model inference is broken!")
        print("  This typically indicates GPU compute capability mismatch.")
        print("  GB10 (sm_121) may need CUDA 13.0+ for native support.")
    elif output.max() < 0.01:
        print("  PROBLEM: Output values too low - detection will find nothing.")
    elif np.isnan(output).any() or np.isinf(output).any():
        print("  PROBLEM: Output contains NaN/Inf - numerical instability.")
    else:
        print("  OK: Output values look reasonable.")
        print(f"  Detection threshold typically 0.3-0.6, max output is {output.max():.3f}")


def test_paddleocr_output(image_path: str):
    """Test full PaddleOCR pipeline."""
    print()
    print("=" * 60)
    print("PADDLEOCR PIPELINE TEST")
    print("=" * 60)

    from paddleocr import PaddleOCR

    ocr = PaddleOCR(
        text_detection_model_name='PP-OCRv4_mobile_det',
        text_recognition_model_name='PP-OCRv4_mobile_rec',
    )

    img = Image.open(image_path)
    arr = np.array(img)

    out = ocr.predict(arr)
    res = out[0].json['res']

    dt_polys = res.get('dt_polys', [])
    rec_texts = res.get('rec_texts', [])

    print(f"Detection polygons: {len(dt_polys)}")
    print(f"Recognition texts: {len(rec_texts)}")

    if rec_texts:
        print(f"Sample texts: {rec_texts[:5]}")
    else:
        print("No text detected!")


def main():
    # Default test image
    image_path = '/app/dataset/0/img/page_0001.png'
    if len(sys.argv) > 1:
        image_path = sys.argv[1]

    if not os.path.exists(image_path):
        print(f"Image not found: {image_path}")
        print("Usage: python debug_gpu_detection.py [image_path]")
        sys.exit(1)

    print(f"Testing with image: {image_path}")
    print()

    check_gpu_status()
    test_basic_ops()
    test_detection_model(image_path)
    test_paddleocr_output(image_path)


if __name__ == '__main__':
    main()