diff --git a/.gitignore b/.gitignore
index 686d80f..f9ab5c6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,3 +6,4 @@ results
 .DS_Store
 .claude
 node_modules
+src/paddle_ocr/wheels
diff --git a/src/paddle_ocr/benchmark.py b/src/paddle_ocr/benchmark.py
new file mode 100644
index 0000000..bf6cc9e
--- /dev/null
+++ b/src/paddle_ocr/benchmark.py
@@ -0,0 +1,207 @@
+# benchmark.py - Compare CPU vs GPU performance for PaddleOCR REST API
+# Usage: python benchmark.py
+
+import requests
+import time
+import json
+import sys
+from datetime import datetime
+
+CONTAINERS = {
+    "GPU": {"url": "http://localhost:8000", "port": 8000},
+    "CPU": {"url": "http://localhost:8002", "port": 8002},
+}
+
+DATASET_PATH = "/app/dataset"
+
+# Test configurations
+TEST_CONFIGS = [
+    {
+        "name": "Baseline",
+        "config": {
+            "pdf_folder": DATASET_PATH,
+            "use_doc_orientation_classify": False,
+            "use_doc_unwarping": False,
+            "textline_orientation": False,
+            "text_det_thresh": 0.0,
+            "text_det_box_thresh": 0.0,
+            "text_det_unclip_ratio": 1.5,
+            "text_rec_score_thresh": 0.0,
+            "start_page": 5,
+            "end_page": 10,
+        }
+    },
+    {
+        "name": "Optimized",
+        "config": {
+            "pdf_folder": DATASET_PATH,
+            "use_doc_orientation_classify": False,
+            "use_doc_unwarping": False,
+            "textline_orientation": True,
+            "text_det_thresh": 0.4690,
+            "text_det_box_thresh": 0.5412,
+            "text_det_unclip_ratio": 0.0,
+            "text_rec_score_thresh": 0.6350,
+            "start_page": 5,
+            "end_page": 10,
+        }
+    },
+]
+
+
+def check_health(url: str, timeout: int = 10) -> bool:
+    """Check if API is healthy."""
+    try:
+        resp = requests.get(f"{url}/health", timeout=timeout)
+        if resp.status_code == 200:
+            data = resp.json()
+            return data.get("model_loaded", False)
+    except Exception as e:
+        print(f"  Health check failed: {e}")
+    return False
+
+
+def run_benchmark(url: str, config: dict, warmup: bool = False) -> dict:
+    """Run a single benchmark test."""
+    eval_url = f"{url}/evaluate"
+
+    start = time.time()
+    resp = requests.post(eval_url, json=config, timeout=600)
+    resp.raise_for_status()
+    total_time = time.time() - start
+
+    result = resp.json()
+    result["total_request_time"] = total_time
+
+    return result
+
+
+def main():
+    results = {
+        "timestamp": datetime.now().isoformat(),
+        "containers": {},
+    }
+
+    print("=" * 60)
+    print("PaddleOCR CPU vs GPU Benchmark")
+    print("=" * 60)
+    print()
+
+    # Check container health
+    print("Checking container health...")
+    for name, info in CONTAINERS.items():
+        healthy = check_health(info["url"])
+        status = "✓ Ready" if healthy else "✗ Not Ready"
+        print(f"  {name} ({info['url']}): {status}")
+        if not healthy:
+            print(f"    Skipping {name} - container not available")
+            continue
+    print()
+
+    # Run benchmarks for each container
+    for container_name, container_info in CONTAINERS.items():
+        url = container_info["url"]
+
+        if not check_health(url):
+            print(f"Skipping {container_name} - not healthy")
+            continue
+
+        print("=" * 60)
+        print(f"Testing: {container_name} Container")
+        print(f"URL: {url}")
+        print("=" * 60)
+
+        container_results = {
+            "url": url,
+            "tests": {},
+        }
+
+        # Warmup run (first run often slower due to model loading/caching)
+        print("\n  Warmup run...")
+        try:
+            warmup_config = TEST_CONFIGS[0]["config"].copy()
+            warmup_config["start_page"] = 5
+            warmup_config["end_page"] = 6  # Just 1 page for warmup
+            run_benchmark(url, warmup_config, warmup=True)
+            print("  Warmup complete.")
+        except Exception as e:
+            print(f"  Warmup failed: {e}")
+
+        # Run each test configuration
+        for test in TEST_CONFIGS:
+            test_name = test["name"]
+            config = test["config"]
+
+            print(f"\n  Running: {test_name} Configuration")
+            print(f"  Pages: {config['start_page']} to {config['end_page']}")
+
+            try:
+                result = run_benchmark(url, config)
+
+                container_results["tests"][test_name] = {
+                    "CER": result["CER"],
+                    "WER": result["WER"],
+                    "PAGES": result["PAGES"],
+                    "TIME_PER_PAGE": result["TIME_PER_PAGE"],
+                    "TOTAL_TIME": result["total_request_time"],
+                }
+
+                print(f"    CER: {result['CER']*100:.2f}%")
+                print(f"    WER: {result['WER']*100:.2f}%")
+                print(f"    Pages: {result['PAGES']}")
+                print(f"    Time/page: {result['TIME_PER_PAGE']:.3f}s")
+                print(f"    Total time: {result['total_request_time']:.2f}s")
+
+            except Exception as e:
+                print(f"    ERROR: {e}")
+                container_results["tests"][test_name] = {"error": str(e)}
+
+        results["containers"][container_name] = container_results
+
+    # Print summary
+    print("\n")
+    print("=" * 60)
+    print("BENCHMARK SUMMARY")
+    print("=" * 60)
+
+    # Table header
+    print(f"\n{'Test':<12} {'Container':<8} {'CER %':<10} {'WER %':<10} {'Time/Page':<12} {'Total (s)':<10}")
+    print("-" * 62)
+
+    for test in TEST_CONFIGS:
+        test_name = test["name"]
+        for container_name in CONTAINERS.keys():
+            if container_name in results["containers"]:
+                tests = results["containers"][container_name].get("tests", {})
+                if test_name in tests and "error" not in tests[test_name]:
+                    t = tests[test_name]
+                    print(f"{test_name:<12} {container_name:<8} {t['CER']*100:<10.2f} {t['WER']*100:<10.2f} {t['TIME_PER_PAGE']:<12.3f} {t['TOTAL_TIME']:<10.2f}")
+
+    # Speed comparison
+    print("\n" + "=" * 60)
+    print("SPEED COMPARISON")
+    print("=" * 60)
+
+    for test in TEST_CONFIGS:
+        test_name = test["name"]
+        gpu_data = results["containers"].get("GPU", {}).get("tests", {}).get(test_name, {})
+        cpu_data = results["containers"].get("CPU", {}).get("tests", {}).get(test_name, {})
+
+        if gpu_data and cpu_data and "error" not in gpu_data and "error" not in cpu_data:
+            speedup = cpu_data["TIME_PER_PAGE"] / gpu_data["TIME_PER_PAGE"]
+            print(f"\n{test_name} Configuration:")
+            print(f"  GPU: {gpu_data['TIME_PER_PAGE']:.3f}s per page")
+            print(f"  CPU: {cpu_data['TIME_PER_PAGE']:.3f}s per page")
+            print(f"  GPU is {speedup:.2f}x faster than CPU")
+
+    # Save results to JSON
+    output_file = "benchmark_results.json"
+    with open(output_file, "w") as f:
+        json.dump(results, f, indent=2)
+    print(f"\n\nResults saved to: {output_file}")
+
+    return results
+
+
+if __name__ == "__main__":
+    main()