2026-01-18 08:19:34 +01:00
|
|
|
{
|
|
|
|
|
"cells": [
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"id": "header",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"# PaddleOCR Hyperparameter Optimization via REST API\n",
|
|
|
|
|
"\n",
|
2026-01-18 18:03:23 +01:00
|
|
|
"Uses Ray Tune + Optuna to find optimal PaddleOCR parameters.\n",
|
2026-01-18 08:19:34 +01:00
|
|
|
"\n",
|
|
|
|
|
"## Prerequisites\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"```bash\n",
|
|
|
|
|
"cd src/paddle_ocr\n",
|
2026-01-18 18:03:23 +01:00
|
|
|
"docker compose -f docker-compose.workers.yml up # GPU workers on 8001-8002\n",
|
|
|
|
|
"# or: docker compose -f docker-compose.workers.yml --profile cpu up\n",
|
2026-01-18 08:19:34 +01:00
|
|
|
"```"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
2026-01-18 18:03:23 +01:00
|
|
|
"id": "deps",
|
2026-01-18 08:19:34 +01:00
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
2026-01-18 18:03:23 +01:00
|
|
|
"%pip install -q -U \"ray[tune]\" optuna requests pandas"
|
2026-01-18 08:19:34 +01:00
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
2026-01-18 18:03:23 +01:00
|
|
|
"id": "setup",
|
2026-01-18 08:19:34 +01:00
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
2026-01-18 18:03:23 +01:00
|
|
|
"from raytune_ocr import (\n",
|
|
|
|
|
" check_workers, create_trainable, run_tuner, analyze_results, correlation_analysis,\n",
|
|
|
|
|
" paddle_ocr_payload, PADDLE_OCR_SEARCH_SPACE, PADDLE_OCR_CONFIG_KEYS,\n",
|
|
|
|
|
")\n",
|
2026-01-18 08:19:34 +01:00
|
|
|
"\n",
|
2026-01-18 18:03:23 +01:00
|
|
|
"# Worker ports\n",
|
|
|
|
|
"PORTS = [8001, 8002]\n",
|
2026-01-18 08:19:34 +01:00
|
|
|
"\n",
|
2026-01-18 18:03:23 +01:00
|
|
|
"# Check workers are running\n",
|
|
|
|
|
"healthy = check_workers(PORTS, \"PaddleOCR\")"
|
2026-01-18 08:19:34 +01:00
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
2026-01-18 18:03:23 +01:00
|
|
|
"id": "tune",
|
2026-01-18 08:19:34 +01:00
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
2026-01-18 18:03:23 +01:00
|
|
|
"# Create trainable and run tuning\n",
|
|
|
|
|
"trainable = create_trainable(PORTS, paddle_ocr_payload)\n",
|
2026-01-18 08:19:34 +01:00
|
|
|
"\n",
|
2026-01-18 18:03:23 +01:00
|
|
|
"results = run_tuner(\n",
|
|
|
|
|
" trainable=trainable,\n",
|
|
|
|
|
" search_space=PADDLE_OCR_SEARCH_SPACE,\n",
|
|
|
|
|
" num_samples=64,\n",
|
|
|
|
|
" num_workers=len(healthy),\n",
|
|
|
|
|
")"
|
2026-01-18 08:19:34 +01:00
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
2026-01-18 18:03:23 +01:00
|
|
|
"id": "analysis",
|
2026-01-18 08:19:34 +01:00
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
2026-01-18 18:03:23 +01:00
|
|
|
"# Analyze results\n",
|
|
|
|
|
"df = analyze_results(\n",
|
|
|
|
|
" results,\n",
|
|
|
|
|
" prefix=\"raytune_paddle\",\n",
|
|
|
|
|
" config_keys=PADDLE_OCR_CONFIG_KEYS,\n",
|
|
|
|
|
")\n",
|
2026-01-18 08:19:34 +01:00
|
|
|
"\n",
|
2026-01-18 18:03:23 +01:00
|
|
|
"df.describe()"
|
2026-01-18 08:19:34 +01:00
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"id": "correlation",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"# Correlation analysis\n",
|
2026-01-18 18:03:23 +01:00
|
|
|
"correlation_analysis(df, PADDLE_OCR_CONFIG_KEYS)"
|
2026-01-18 08:19:34 +01:00
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"metadata": {
|
|
|
|
|
"kernelspec": {
|
2026-01-18 18:03:23 +01:00
|
|
|
"display_name": "Python 3",
|
2026-01-18 08:19:34 +01:00
|
|
|
"language": "python",
|
|
|
|
|
"name": "python3"
|
|
|
|
|
},
|
|
|
|
|
"language_info": {
|
|
|
|
|
"name": "python",
|
2026-01-18 18:03:23 +01:00
|
|
|
"version": "3.10.0"
|
2026-01-18 08:19:34 +01:00
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"nbformat": 4,
|
|
|
|
|
"nbformat_minor": 5
|
2026-01-18 18:03:23 +01:00
|
|
|
}
|