doceker support
This commit is contained in:
114
src/paddle_ocr/test.py
Normal file
114
src/paddle_ocr/test.py
Normal file
@@ -0,0 +1,114 @@
|
||||
# test.py - Simple client to test PaddleOCR REST API
|
||||
# Usage: python test.py [--url URL] [--dataset PATH]
|
||||
|
||||
import argparse
|
||||
import requests
|
||||
import time
|
||||
import sys
|
||||
|
||||
|
||||
def wait_for_health(url: str, timeout: int = 120) -> bool:
|
||||
"""Wait for API to be ready."""
|
||||
health_url = f"{url}/health"
|
||||
start = time.time()
|
||||
|
||||
print(f"Waiting for API at {health_url}...")
|
||||
while time.time() - start < timeout:
|
||||
try:
|
||||
resp = requests.get(health_url, timeout=5)
|
||||
if resp.status_code == 200:
|
||||
data = resp.json()
|
||||
if data.get("model_loaded"):
|
||||
print(f"API ready! Model loaded in {time.time() - start:.1f}s")
|
||||
return True
|
||||
print(f" Model loading... ({time.time() - start:.0f}s)")
|
||||
except requests.exceptions.ConnectionError:
|
||||
print(f" Connecting... ({time.time() - start:.0f}s)")
|
||||
except Exception as e:
|
||||
print(f" Error: {e}")
|
||||
time.sleep(2)
|
||||
|
||||
print("Timeout waiting for API")
|
||||
return False
|
||||
|
||||
|
||||
def test_evaluate(url: str, config: dict) -> dict:
|
||||
"""Run evaluation with given config."""
|
||||
eval_url = f"{url}/evaluate"
|
||||
|
||||
print(f"\nTesting config: {config}")
|
||||
start = time.time()
|
||||
|
||||
resp = requests.post(eval_url, json=config, timeout=600)
|
||||
resp.raise_for_status()
|
||||
|
||||
result = resp.json()
|
||||
elapsed = time.time() - start
|
||||
|
||||
print(f"Results (took {elapsed:.1f}s):")
|
||||
print(f" CER: {result['CER']:.4f} ({result['CER']*100:.2f}%)")
|
||||
print(f" WER: {result['WER']:.4f} ({result['WER']*100:.2f}%)")
|
||||
print(f" Pages: {result['PAGES']}")
|
||||
print(f" Time/page: {result['TIME_PER_PAGE']:.2f}s")
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Test PaddleOCR REST API")
|
||||
parser.add_argument("--url", default="http://localhost:8000", help="API base URL")
|
||||
parser.add_argument("--dataset", default="/app/dataset", help="Dataset path (inside container)")
|
||||
parser.add_argument("--skip-health", action="store_true", help="Skip health check wait")
|
||||
args = parser.parse_args()
|
||||
|
||||
# Wait for API to be ready
|
||||
if not args.skip_health:
|
||||
if not wait_for_health(args.url):
|
||||
sys.exit(1)
|
||||
|
||||
# Test 1: Baseline config (default PaddleOCR)
|
||||
print("\n" + "="*50)
|
||||
print("TEST 1: Baseline Configuration")
|
||||
print("="*50)
|
||||
baseline = test_evaluate(args.url, {
|
||||
"pdf_folder": args.dataset,
|
||||
"use_doc_orientation_classify": False,
|
||||
"use_doc_unwarping": False,
|
||||
"textline_orientation": False, # Baseline: disabled
|
||||
"text_det_thresh": 0.0,
|
||||
"text_det_box_thresh": 0.0,
|
||||
"text_det_unclip_ratio": 1.5,
|
||||
"text_rec_score_thresh": 0.0,
|
||||
"start_page": 5,
|
||||
"end_page": 10,
|
||||
})
|
||||
|
||||
# Test 2: Optimized config (from Ray Tune results)
|
||||
print("\n" + "="*50)
|
||||
print("TEST 2: Optimized Configuration")
|
||||
print("="*50)
|
||||
optimized = test_evaluate(args.url, {
|
||||
"pdf_folder": args.dataset,
|
||||
"use_doc_orientation_classify": False,
|
||||
"use_doc_unwarping": False,
|
||||
"textline_orientation": True, # KEY: enabled
|
||||
"text_det_thresh": 0.4690,
|
||||
"text_det_box_thresh": 0.5412,
|
||||
"text_det_unclip_ratio": 0.0,
|
||||
"text_rec_score_thresh": 0.6350,
|
||||
"start_page": 5,
|
||||
"end_page": 10,
|
||||
})
|
||||
|
||||
# Summary
|
||||
print("\n" + "="*50)
|
||||
print("SUMMARY")
|
||||
print("="*50)
|
||||
cer_reduction = (1 - optimized["CER"] / baseline["CER"]) * 100 if baseline["CER"] > 0 else 0
|
||||
print(f"Baseline CER: {baseline['CER']*100:.2f}%")
|
||||
print(f"Optimized CER: {optimized['CER']*100:.2f}%")
|
||||
print(f"Improvement: {cer_reduction:.1f}% reduction in errors")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user