112 lines
2.4 KiB
Plaintext
112 lines
2.4 KiB
Plaintext
|
|
{
|
||
|
|
"cells": [
|
||
|
|
{
|
||
|
|
"cell_type": "markdown",
|
||
|
|
"id": "header",
|
||
|
|
"metadata": {},
|
||
|
|
"source": [
|
||
|
|
"# DocTR Hyperparameter Optimization via REST API\n",
|
||
|
|
"\n",
|
||
|
|
"Uses Ray Tune + Optuna to find optimal DocTR parameters.\n",
|
||
|
|
"\n",
|
||
|
|
"## Prerequisites\n",
|
||
|
|
"\n",
|
||
|
|
"```bash\n",
|
||
|
|
"cd src/doctr_service\n",
|
||
|
|
"docker compose up ocr-cpu # or ocr-gpu\n",
|
||
|
|
"```\n",
|
||
|
|
"\n",
|
||
|
|
"Service runs on port 8003."
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": null,
|
||
|
|
"id": "deps",
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [],
|
||
|
|
"source": [
|
||
|
|
"%pip install -q -U \"ray[tune]\" optuna requests pandas"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": null,
|
||
|
|
"id": "setup",
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [],
|
||
|
|
"source": [
|
||
|
|
"from raytune_ocr import (\n",
|
||
|
|
" check_workers, create_trainable, run_tuner, analyze_results, correlation_analysis,\n",
|
||
|
|
" doctr_payload, DOCTR_SEARCH_SPACE, DOCTR_CONFIG_KEYS,\n",
|
||
|
|
")\n",
|
||
|
|
"\n",
|
||
|
|
"# Worker ports\n",
|
||
|
|
"PORTS = [8003]\n",
|
||
|
|
"\n",
|
||
|
|
"# Check workers are running\n",
|
||
|
|
"healthy = check_workers(PORTS, \"DocTR\")"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": null,
|
||
|
|
"id": "tune",
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [],
|
||
|
|
"source": [
|
||
|
|
"# Create trainable and run tuning\n",
|
||
|
|
"trainable = create_trainable(PORTS, doctr_payload)\n",
|
||
|
|
"\n",
|
||
|
|
"results = run_tuner(\n",
|
||
|
|
" trainable=trainable,\n",
|
||
|
|
" search_space=DOCTR_SEARCH_SPACE,\n",
|
||
|
|
" num_samples=64,\n",
|
||
|
|
" num_workers=len(healthy),\n",
|
||
|
|
")"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": null,
|
||
|
|
"id": "analysis",
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [],
|
||
|
|
"source": [
|
||
|
|
"# Analyze results\n",
|
||
|
|
"df = analyze_results(\n",
|
||
|
|
" results,\n",
|
||
|
|
" prefix=\"raytune_doctr\",\n",
|
||
|
|
" config_keys=DOCTR_CONFIG_KEYS,\n",
|
||
|
|
")\n",
|
||
|
|
"\n",
|
||
|
|
"df.describe()"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": null,
|
||
|
|
"id": "correlation",
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [],
|
||
|
|
"source": [
|
||
|
|
"# Correlation analysis\n",
|
||
|
|
"correlation_analysis(df, DOCTR_CONFIG_KEYS)"
|
||
|
|
]
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"kernelspec": {
|
||
|
|
"display_name": "Python 3",
|
||
|
|
"language": "python",
|
||
|
|
"name": "python3"
|
||
|
|
},
|
||
|
|
"language_info": {
|
||
|
|
"name": "python",
|
||
|
|
"version": "3.10.0"
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"nbformat": 4,
|
||
|
|
"nbformat_minor": 5
|
||
|
|
}
|