Files
MastersThesis/src/doctr_raytune_rest.ipynb

109 lines
2.5 KiB
Plaintext
Raw Normal View History

2026-01-18 18:03:23 +01:00
{
"cells": [
{
"cell_type": "markdown",
"id": "header",
"metadata": {},
"source": [
"# DocTR Hyperparameter Optimization via REST API\n",
"\n",
"Uses Ray Tune + Optuna to find optimal DocTR parameters.\n",
"\n",
"## Prerequisites\n",
"\n",
"```bash\n",
"cd src/doctr_service\n",
"docker compose up ocr-cpu # or ocr-gpu\n",
"```\n",
"\n",
"Service runs on port 8003."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "deps",
"metadata": {},
"outputs": [],
2026-01-18 18:43:16 +01:00
"source": "# Pin Ray version for API stability (tune.report takes dict, not kwargs in 2.x)\n%pip install -q \"ray[tune]==2.53.0\" optuna requests pandas"
2026-01-18 18:03:23 +01:00
},
{
"cell_type": "code",
"execution_count": null,
"id": "setup",
"metadata": {},
"outputs": [],
"source": [
"from raytune_ocr import (\n",
" check_workers, create_trainable, run_tuner, analyze_results, correlation_analysis,\n",
" doctr_payload, DOCTR_SEARCH_SPACE, DOCTR_CONFIG_KEYS,\n",
")\n",
"\n",
"# Worker ports\n",
"PORTS = [8003]\n",
"\n",
"# Check workers are running\n",
"healthy = check_workers(PORTS, \"DocTR\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "tune",
"metadata": {},
"outputs": [],
"source": [
"# Create trainable and run tuning\n",
"trainable = create_trainable(PORTS, doctr_payload)\n",
"\n",
"results = run_tuner(\n",
" trainable=trainable,\n",
" search_space=DOCTR_SEARCH_SPACE,\n",
" num_samples=64,\n",
" num_workers=len(healthy),\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "analysis",
"metadata": {},
"outputs": [],
"source": [
"# Analyze results\n",
"df = analyze_results(\n",
" results,\n",
" prefix=\"raytune_doctr\",\n",
" config_keys=DOCTR_CONFIG_KEYS,\n",
")\n",
"\n",
"df.describe()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "correlation",
"metadata": {},
"outputs": [],
"source": [
"# Correlation analysis\n",
"correlation_analysis(df, DOCTR_CONFIG_KEYS)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python",
"version": "3.10.0"
}
},
"nbformat": 4,
"nbformat_minor": 5
2026-01-18 18:43:16 +01:00
}