Files
MastersThesis/src/paddle_ocr_raytune_rest.ipynb

111 lines
2.4 KiB
Plaintext
Raw Normal View History

2026-01-18 08:19:34 +01:00
{
"cells": [
{
"cell_type": "markdown",
"id": "header",
"metadata": {},
"source": [
"# PaddleOCR Hyperparameter Optimization via REST API\n",
"\n",
2026-01-18 18:03:23 +01:00
"Uses Ray Tune + Optuna to find optimal PaddleOCR parameters.\n",
2026-01-18 08:19:34 +01:00
"\n",
"## Prerequisites\n",
"\n",
"```bash\n",
"cd src/paddle_ocr\n",
2026-01-18 18:03:23 +01:00
"docker compose -f docker-compose.workers.yml up # GPU workers on 8001-8002\n",
"# or: docker compose -f docker-compose.workers.yml --profile cpu up\n",
2026-01-18 08:19:34 +01:00
"```"
]
},
{
"cell_type": "code",
"execution_count": null,
2026-01-18 18:03:23 +01:00
"id": "deps",
2026-01-18 08:19:34 +01:00
"metadata": {},
"outputs": [],
"source": [
2026-01-18 18:03:23 +01:00
"%pip install -q -U \"ray[tune]\" optuna requests pandas"
2026-01-18 08:19:34 +01:00
]
},
{
"cell_type": "code",
"execution_count": null,
2026-01-18 18:03:23 +01:00
"id": "setup",
2026-01-18 08:19:34 +01:00
"metadata": {},
"outputs": [],
"source": [
2026-01-18 18:03:23 +01:00
"from raytune_ocr import (\n",
" check_workers, create_trainable, run_tuner, analyze_results, correlation_analysis,\n",
" paddle_ocr_payload, PADDLE_OCR_SEARCH_SPACE, PADDLE_OCR_CONFIG_KEYS,\n",
")\n",
2026-01-18 08:19:34 +01:00
"\n",
2026-01-18 18:03:23 +01:00
"# Worker ports\n",
"PORTS = [8001, 8002]\n",
2026-01-18 08:19:34 +01:00
"\n",
2026-01-18 18:03:23 +01:00
"# Check workers are running\n",
"healthy = check_workers(PORTS, \"PaddleOCR\")"
2026-01-18 08:19:34 +01:00
]
},
{
"cell_type": "code",
"execution_count": null,
2026-01-18 18:03:23 +01:00
"id": "tune",
2026-01-18 08:19:34 +01:00
"metadata": {},
"outputs": [],
"source": [
2026-01-18 18:03:23 +01:00
"# Create trainable and run tuning\n",
"trainable = create_trainable(PORTS, paddle_ocr_payload)\n",
2026-01-18 08:19:34 +01:00
"\n",
2026-01-18 18:03:23 +01:00
"results = run_tuner(\n",
" trainable=trainable,\n",
" search_space=PADDLE_OCR_SEARCH_SPACE,\n",
" num_samples=64,\n",
" num_workers=len(healthy),\n",
")"
2026-01-18 08:19:34 +01:00
]
},
{
"cell_type": "code",
"execution_count": null,
2026-01-18 18:03:23 +01:00
"id": "analysis",
2026-01-18 08:19:34 +01:00
"metadata": {},
"outputs": [],
"source": [
2026-01-18 18:03:23 +01:00
"# Analyze results\n",
"df = analyze_results(\n",
" results,\n",
" prefix=\"raytune_paddle\",\n",
" config_keys=PADDLE_OCR_CONFIG_KEYS,\n",
")\n",
2026-01-18 08:19:34 +01:00
"\n",
2026-01-18 18:03:23 +01:00
"df.describe()"
2026-01-18 08:19:34 +01:00
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "correlation",
"metadata": {},
"outputs": [],
"source": [
"# Correlation analysis\n",
2026-01-18 18:03:23 +01:00
"correlation_analysis(df, PADDLE_OCR_CONFIG_KEYS)"
2026-01-18 08:19:34 +01:00
]
}
],
"metadata": {
"kernelspec": {
2026-01-18 18:03:23 +01:00
"display_name": "Python 3",
2026-01-18 08:19:34 +01:00
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python",
2026-01-18 18:03:23 +01:00
"version": "3.10.0"
2026-01-18 08:19:34 +01:00
}
},
"nbformat": 4,
"nbformat_minor": 5
2026-01-18 18:03:23 +01:00
}