measuring
Some checks failed
build_docker / build_gpu (linux/arm64) (push) Has been cancelled
build_docker / manifest_cpu (push) Has been cancelled
build_docker / manifest_gpu (push) Has been cancelled
build_docker / build_cpu (linux/arm64) (push) Has been cancelled
build_docker / build_cpu (linux/amd64) (push) Has been cancelled
build_docker / essential (push) Successful in 1s
build_docker / essential (pull_request) Successful in 1s
build_docker / build_gpu (linux/amd64) (push) Has been cancelled
build_docker / build_cpu (linux/amd64) (pull_request) Successful in 4m1s
build_docker / build_cpu (linux/arm64) (pull_request) Successful in 21m42s
build_docker / build_gpu (linux/amd64) (pull_request) Successful in 19m54s
build_docker / manifest_cpu (pull_request) Successful in 22s
build_docker / build_gpu (linux/arm64) (pull_request) Successful in 17m47s
build_docker / manifest_gpu (pull_request) Successful in 22s
Some checks failed
build_docker / build_gpu (linux/arm64) (push) Has been cancelled
build_docker / manifest_cpu (push) Has been cancelled
build_docker / manifest_gpu (push) Has been cancelled
build_docker / build_cpu (linux/arm64) (push) Has been cancelled
build_docker / build_cpu (linux/amd64) (push) Has been cancelled
build_docker / essential (push) Successful in 1s
build_docker / essential (pull_request) Successful in 1s
build_docker / build_gpu (linux/amd64) (push) Has been cancelled
build_docker / build_cpu (linux/amd64) (pull_request) Successful in 4m1s
build_docker / build_cpu (linux/arm64) (pull_request) Successful in 21m42s
build_docker / build_gpu (linux/amd64) (pull_request) Successful in 19m54s
build_docker / manifest_cpu (pull_request) Successful in 22s
build_docker / build_gpu (linux/arm64) (pull_request) Successful in 17m47s
build_docker / manifest_gpu (pull_request) Successful in 22s
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -6,3 +6,4 @@ results
|
|||||||
.DS_Store
|
.DS_Store
|
||||||
.claude
|
.claude
|
||||||
node_modules
|
node_modules
|
||||||
|
src/paddle_ocr/wheels
|
||||||
|
|||||||
207
src/paddle_ocr/benchmark.py
Normal file
207
src/paddle_ocr/benchmark.py
Normal file
@@ -0,0 +1,207 @@
|
|||||||
|
# benchmark.py - Compare CPU vs GPU performance for PaddleOCR REST API
|
||||||
|
# Usage: python benchmark.py
|
||||||
|
|
||||||
|
import requests
|
||||||
|
import time
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
CONTAINERS = {
|
||||||
|
"GPU": {"url": "http://localhost:8000", "port": 8000},
|
||||||
|
"CPU": {"url": "http://localhost:8002", "port": 8002},
|
||||||
|
}
|
||||||
|
|
||||||
|
DATASET_PATH = "/app/dataset"
|
||||||
|
|
||||||
|
# Test configurations
|
||||||
|
TEST_CONFIGS = [
|
||||||
|
{
|
||||||
|
"name": "Baseline",
|
||||||
|
"config": {
|
||||||
|
"pdf_folder": DATASET_PATH,
|
||||||
|
"use_doc_orientation_classify": False,
|
||||||
|
"use_doc_unwarping": False,
|
||||||
|
"textline_orientation": False,
|
||||||
|
"text_det_thresh": 0.0,
|
||||||
|
"text_det_box_thresh": 0.0,
|
||||||
|
"text_det_unclip_ratio": 1.5,
|
||||||
|
"text_rec_score_thresh": 0.0,
|
||||||
|
"start_page": 5,
|
||||||
|
"end_page": 10,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Optimized",
|
||||||
|
"config": {
|
||||||
|
"pdf_folder": DATASET_PATH,
|
||||||
|
"use_doc_orientation_classify": False,
|
||||||
|
"use_doc_unwarping": False,
|
||||||
|
"textline_orientation": True,
|
||||||
|
"text_det_thresh": 0.4690,
|
||||||
|
"text_det_box_thresh": 0.5412,
|
||||||
|
"text_det_unclip_ratio": 0.0,
|
||||||
|
"text_rec_score_thresh": 0.6350,
|
||||||
|
"start_page": 5,
|
||||||
|
"end_page": 10,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def check_health(url: str, timeout: int = 10) -> bool:
|
||||||
|
"""Check if API is healthy."""
|
||||||
|
try:
|
||||||
|
resp = requests.get(f"{url}/health", timeout=timeout)
|
||||||
|
if resp.status_code == 200:
|
||||||
|
data = resp.json()
|
||||||
|
return data.get("model_loaded", False)
|
||||||
|
except Exception as e:
|
||||||
|
print(f" Health check failed: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def run_benchmark(url: str, config: dict, warmup: bool = False) -> dict:
|
||||||
|
"""Run a single benchmark test."""
|
||||||
|
eval_url = f"{url}/evaluate"
|
||||||
|
|
||||||
|
start = time.time()
|
||||||
|
resp = requests.post(eval_url, json=config, timeout=600)
|
||||||
|
resp.raise_for_status()
|
||||||
|
total_time = time.time() - start
|
||||||
|
|
||||||
|
result = resp.json()
|
||||||
|
result["total_request_time"] = total_time
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
results = {
|
||||||
|
"timestamp": datetime.now().isoformat(),
|
||||||
|
"containers": {},
|
||||||
|
}
|
||||||
|
|
||||||
|
print("=" * 60)
|
||||||
|
print("PaddleOCR CPU vs GPU Benchmark")
|
||||||
|
print("=" * 60)
|
||||||
|
print()
|
||||||
|
|
||||||
|
# Check container health
|
||||||
|
print("Checking container health...")
|
||||||
|
for name, info in CONTAINERS.items():
|
||||||
|
healthy = check_health(info["url"])
|
||||||
|
status = "✓ Ready" if healthy else "✗ Not Ready"
|
||||||
|
print(f" {name} ({info['url']}): {status}")
|
||||||
|
if not healthy:
|
||||||
|
print(f" Skipping {name} - container not available")
|
||||||
|
continue
|
||||||
|
print()
|
||||||
|
|
||||||
|
# Run benchmarks for each container
|
||||||
|
for container_name, container_info in CONTAINERS.items():
|
||||||
|
url = container_info["url"]
|
||||||
|
|
||||||
|
if not check_health(url):
|
||||||
|
print(f"Skipping {container_name} - not healthy")
|
||||||
|
continue
|
||||||
|
|
||||||
|
print("=" * 60)
|
||||||
|
print(f"Testing: {container_name} Container")
|
||||||
|
print(f"URL: {url}")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
container_results = {
|
||||||
|
"url": url,
|
||||||
|
"tests": {},
|
||||||
|
}
|
||||||
|
|
||||||
|
# Warmup run (first run often slower due to model loading/caching)
|
||||||
|
print("\n Warmup run...")
|
||||||
|
try:
|
||||||
|
warmup_config = TEST_CONFIGS[0]["config"].copy()
|
||||||
|
warmup_config["start_page"] = 5
|
||||||
|
warmup_config["end_page"] = 6 # Just 1 page for warmup
|
||||||
|
run_benchmark(url, warmup_config, warmup=True)
|
||||||
|
print(" Warmup complete.")
|
||||||
|
except Exception as e:
|
||||||
|
print(f" Warmup failed: {e}")
|
||||||
|
|
||||||
|
# Run each test configuration
|
||||||
|
for test in TEST_CONFIGS:
|
||||||
|
test_name = test["name"]
|
||||||
|
config = test["config"]
|
||||||
|
|
||||||
|
print(f"\n Running: {test_name} Configuration")
|
||||||
|
print(f" Pages: {config['start_page']} to {config['end_page']}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = run_benchmark(url, config)
|
||||||
|
|
||||||
|
container_results["tests"][test_name] = {
|
||||||
|
"CER": result["CER"],
|
||||||
|
"WER": result["WER"],
|
||||||
|
"PAGES": result["PAGES"],
|
||||||
|
"TIME_PER_PAGE": result["TIME_PER_PAGE"],
|
||||||
|
"TOTAL_TIME": result["total_request_time"],
|
||||||
|
}
|
||||||
|
|
||||||
|
print(f" CER: {result['CER']*100:.2f}%")
|
||||||
|
print(f" WER: {result['WER']*100:.2f}%")
|
||||||
|
print(f" Pages: {result['PAGES']}")
|
||||||
|
print(f" Time/page: {result['TIME_PER_PAGE']:.3f}s")
|
||||||
|
print(f" Total time: {result['total_request_time']:.2f}s")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f" ERROR: {e}")
|
||||||
|
container_results["tests"][test_name] = {"error": str(e)}
|
||||||
|
|
||||||
|
results["containers"][container_name] = container_results
|
||||||
|
|
||||||
|
# Print summary
|
||||||
|
print("\n")
|
||||||
|
print("=" * 60)
|
||||||
|
print("BENCHMARK SUMMARY")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
# Table header
|
||||||
|
print(f"\n{'Test':<12} {'Container':<8} {'CER %':<10} {'WER %':<10} {'Time/Page':<12} {'Total (s)':<10}")
|
||||||
|
print("-" * 62)
|
||||||
|
|
||||||
|
for test in TEST_CONFIGS:
|
||||||
|
test_name = test["name"]
|
||||||
|
for container_name in CONTAINERS.keys():
|
||||||
|
if container_name in results["containers"]:
|
||||||
|
tests = results["containers"][container_name].get("tests", {})
|
||||||
|
if test_name in tests and "error" not in tests[test_name]:
|
||||||
|
t = tests[test_name]
|
||||||
|
print(f"{test_name:<12} {container_name:<8} {t['CER']*100:<10.2f} {t['WER']*100:<10.2f} {t['TIME_PER_PAGE']:<12.3f} {t['TOTAL_TIME']:<10.2f}")
|
||||||
|
|
||||||
|
# Speed comparison
|
||||||
|
print("\n" + "=" * 60)
|
||||||
|
print("SPEED COMPARISON")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
for test in TEST_CONFIGS:
|
||||||
|
test_name = test["name"]
|
||||||
|
gpu_data = results["containers"].get("GPU", {}).get("tests", {}).get(test_name, {})
|
||||||
|
cpu_data = results["containers"].get("CPU", {}).get("tests", {}).get(test_name, {})
|
||||||
|
|
||||||
|
if gpu_data and cpu_data and "error" not in gpu_data and "error" not in cpu_data:
|
||||||
|
speedup = cpu_data["TIME_PER_PAGE"] / gpu_data["TIME_PER_PAGE"]
|
||||||
|
print(f"\n{test_name} Configuration:")
|
||||||
|
print(f" GPU: {gpu_data['TIME_PER_PAGE']:.3f}s per page")
|
||||||
|
print(f" CPU: {cpu_data['TIME_PER_PAGE']:.3f}s per page")
|
||||||
|
print(f" GPU is {speedup:.2f}x faster than CPU")
|
||||||
|
|
||||||
|
# Save results to JSON
|
||||||
|
output_file = "benchmark_results.json"
|
||||||
|
with open(output_file, "w") as f:
|
||||||
|
json.dump(results, f, indent=2)
|
||||||
|
print(f"\n\nResults saved to: {output_file}")
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Reference in New Issue
Block a user