Some checks failed
build_docker / build_doctr (linux/amd64) (pull_request) Has been cancelled
build_docker / build_doctr (linux/arm64) (pull_request) Has been cancelled
build_docker / essential (pull_request) Has been cancelled
build_docker / build_cpu (linux/amd64) (pull_request) Has been cancelled
build_docker / build_cpu (linux/arm64) (pull_request) Has been cancelled
build_docker / build_gpu (linux/amd64) (pull_request) Has been cancelled
build_docker / build_gpu (linux/arm64) (pull_request) Has been cancelled
build_docker / manifest_cpu (pull_request) Has been cancelled
build_docker / manifest_gpu (pull_request) Has been cancelled
build_docker / build_easyocr (linux/amd64) (pull_request) Has been cancelled
build_docker / build_easyocr (linux/arm64) (pull_request) Has been cancelled
build_docker / build_doctr_gpu (linux/arm64) (pull_request) Has been cancelled
build_docker / manifest_easyocr_gpu (pull_request) Has been cancelled
build_docker / manifest_doctr_gpu (pull_request) Has been cancelled
build_docker / manifest_easyocr (pull_request) Has been cancelled
build_docker / manifest_doctr (pull_request) Has been cancelled
build_docker / build_easyocr_gpu (linux/amd64) (pull_request) Has been cancelled
build_docker / build_easyocr_gpu (linux/arm64) (pull_request) Has been cancelled
build_docker / build_doctr_gpu (linux/amd64) (pull_request) Has been cancelled
109 lines
2.5 KiB
Plaintext
109 lines
2.5 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "header",
|
|
"metadata": {},
|
|
"source": [
|
|
"# DocTR Hyperparameter Optimization via REST API\n",
|
|
"\n",
|
|
"Uses Ray Tune + Optuna to find optimal DocTR parameters.\n",
|
|
"\n",
|
|
"## Prerequisites\n",
|
|
"\n",
|
|
"```bash\n",
|
|
"cd src/doctr_service\n",
|
|
"docker compose up ocr-cpu # or ocr-gpu\n",
|
|
"```\n",
|
|
"\n",
|
|
"Service runs on port 8003."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "deps",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": "# Pin Ray version for API stability (tune.report takes dict, not kwargs in 2.x)\n%pip install -q \"ray[tune]==2.53.0\" optuna requests pandas"
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "setup",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from raytune_ocr import (\n",
|
|
" check_workers, create_trainable, run_tuner, analyze_results, correlation_analysis,\n",
|
|
" doctr_payload, DOCTR_SEARCH_SPACE, DOCTR_CONFIG_KEYS,\n",
|
|
")\n",
|
|
"\n",
|
|
"# Worker ports\n",
|
|
"PORTS = [8003]\n",
|
|
"\n",
|
|
"# Check workers are running\n",
|
|
"healthy = check_workers(PORTS, \"DocTR\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "tune",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Create trainable and run tuning\n",
|
|
"trainable = create_trainable(PORTS, doctr_payload)\n",
|
|
"\n",
|
|
"results = run_tuner(\n",
|
|
" trainable=trainable,\n",
|
|
" search_space=DOCTR_SEARCH_SPACE,\n",
|
|
" num_samples=64,\n",
|
|
" num_workers=len(healthy),\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "analysis",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Analyze results\n",
|
|
"df = analyze_results(\n",
|
|
" results,\n",
|
|
" prefix=\"raytune_doctr\",\n",
|
|
" config_keys=DOCTR_CONFIG_KEYS,\n",
|
|
")\n",
|
|
"\n",
|
|
"df.describe()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "correlation",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Correlation analysis\n",
|
|
"correlation_analysis(df, DOCTR_CONFIG_KEYS)"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"name": "python",
|
|
"version": "3.10.0"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
} |