{ "cells": [ { "cell_type": "markdown", "id": "header", "metadata": {}, "source": [ "# DocTR Hyperparameter Optimization via REST API\n", "\n", "Uses Ray Tune + Optuna to find optimal DocTR parameters.\n", "\n", "## Prerequisites\n", "\n", "```bash\n", "cd src/doctr_service\n", "docker compose up ocr-cpu # or ocr-gpu\n", "```\n", "\n", "Service runs on port 8003." ] }, { "cell_type": "code", "execution_count": null, "id": "deps", "metadata": {}, "outputs": [], "source": "# Pin Ray version for API stability (tune.report takes dict, not kwargs in 2.x)\n%pip install -q \"ray[tune]==2.53.0\" optuna requests pandas" }, { "cell_type": "code", "execution_count": null, "id": "setup", "metadata": {}, "outputs": [], "source": [ "from raytune_ocr import (\n", " check_workers, create_trainable, run_tuner, analyze_results, correlation_analysis,\n", " doctr_payload, DOCTR_SEARCH_SPACE, DOCTR_CONFIG_KEYS,\n", ")\n", "\n", "# Worker ports\n", "PORTS = [8003]\n", "\n", "# Check workers are running\n", "healthy = check_workers(PORTS, \"DocTR\")" ] }, { "cell_type": "code", "execution_count": null, "id": "tune", "metadata": {}, "outputs": [], "source": [ "# Create trainable and run tuning\n", "trainable = create_trainable(PORTS, doctr_payload)\n", "\n", "results = run_tuner(\n", " trainable=trainable,\n", " search_space=DOCTR_SEARCH_SPACE,\n", " num_samples=64,\n", " num_workers=len(healthy),\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "analysis", "metadata": {}, "outputs": [], "source": [ "# Analyze results\n", "df = analyze_results(\n", " results,\n", " prefix=\"raytune_doctr\",\n", " config_keys=DOCTR_CONFIG_KEYS,\n", ")\n", "\n", "df.describe()" ] }, { "cell_type": "code", "execution_count": null, "id": "correlation", "metadata": {}, "outputs": [], "source": [ "# Correlation analysis\n", "correlation_analysis(df, DOCTR_CONFIG_KEYS)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "name": "python", "version": "3.10.0" } }, "nbformat": 4, "nbformat_minor": 5 }