remove unneded py file
All checks were successful
build_docker / manifest_easyocr_gpu (pull_request) Successful in 22s
build_docker / build_doctr_gpu (linux/arm64) (pull_request) Successful in 52m54s
build_docker / manifest_doctr_gpu (pull_request) Successful in 22s
build_docker / essential (pull_request) Successful in 1s
build_docker / build_cpu (linux/amd64) (pull_request) Successful in 5m4s
build_docker / build_cpu (linux/arm64) (pull_request) Successful in 22m30s
build_docker / build_gpu (linux/amd64) (pull_request) Successful in 21m10s
build_docker / build_easyocr (linux/amd64) (pull_request) Successful in 14m38s
build_docker / build_gpu (linux/arm64) (pull_request) Successful in 19m36s
build_docker / build_doctr (linux/amd64) (pull_request) Successful in 15m11s
build_docker / build_easyocr (linux/arm64) (pull_request) Successful in 19m55s
build_docker / build_easyocr_gpu (linux/amd64) (pull_request) Successful in 16m53s
build_docker / build_doctr (linux/arm64) (pull_request) Successful in 22m41s
build_docker / build_doctr_gpu (linux/amd64) (pull_request) Successful in 16m30s
build_docker / build_easyocr_gpu (linux/arm64) (pull_request) Successful in 54m14s
build_docker / manifest_cpu (pull_request) Successful in 23s
build_docker / manifest_gpu (pull_request) Successful in 24s
build_docker / manifest_easyocr (pull_request) Successful in 21s
build_docker / manifest_doctr (pull_request) Successful in 21s

This commit is contained in:
2026-01-18 08:24:23 +01:00
parent 67092e4df0
commit b29df98602
3 changed files with 129 additions and 3264 deletions

View File

@@ -188,62 +188,7 @@
"id": "trainable",
"metadata": {},
"outputs": [],
"source": [
"def trainable_paddle_ocr(config):\n",
" \"\"\"Call PaddleOCR REST API with the given hyperparameter config.\n",
" \n",
" Uses trial index to deterministically assign a worker (round-robin),\n",
" ensuring only 1 request per container at a time.\n",
" \"\"\"\n",
" import requests # Must be inside function for Ray workers\n",
" from ray import train\n",
"\n",
" # Worker URLs - round-robin assignment based on trial index\n",
" WORKER_PORTS = [8001, 8002]\n",
" NUM_WORKERS = len(WORKER_PORTS)\n",
" \n",
" # Get trial context for deterministic worker assignment\n",
" context = train.get_context()\n",
" trial_id = context.get_trial_id() if context else \"0\"\n",
" # Extract numeric part from trial ID (e.g., \"trainable_paddle_ocr_abc123_00001\" -> 1)\n",
" try:\n",
" trial_num = int(trial_id.split(\"_\")[-1])\n",
" except (ValueError, IndexError):\n",
" trial_num = hash(trial_id)\n",
" \n",
" worker_idx = trial_num % NUM_WORKERS\n",
" api_url = f\"http://localhost:{WORKER_PORTS[worker_idx]}\"\n",
"\n",
" payload = {\n",
" \"pdf_folder\": \"/app/dataset\",\n",
" \"use_doc_orientation_classify\": config.get(\"use_doc_orientation_classify\", False),\n",
" \"use_doc_unwarping\": config.get(\"use_doc_unwarping\", False),\n",
" \"textline_orientation\": config.get(\"textline_orientation\", True),\n",
" \"text_det_thresh\": config.get(\"text_det_thresh\", 0.0),\n",
" \"text_det_box_thresh\": config.get(\"text_det_box_thresh\", 0.0),\n",
" \"text_det_unclip_ratio\": config.get(\"text_det_unclip_ratio\", 1.5),\n",
" \"text_rec_score_thresh\": config.get(\"text_rec_score_thresh\", 0.0),\n",
" \"start_page\": 5,\n",
" \"end_page\": 10,\n",
" }\n",
"\n",
" try:\n",
" response = requests.post(f\"{api_url}/evaluate\", json=payload, timeout=None) # No timeout\n",
" response.raise_for_status()\n",
" metrics = response.json()\n",
" metrics[\"worker\"] = api_url\n",
" train.report(metrics)\n",
" except Exception as e:\n",
" train.report({\n",
" \"CER\": 1.0,\n",
" \"WER\": 1.0,\n",
" \"TIME\": 0.0,\n",
" \"PAGES\": 0,\n",
" \"TIME_PER_PAGE\": 0,\n",
" \"worker\": api_url,\n",
" \"ERROR\": str(e)[:500]\n",
" })"
]
"source": "def trainable_paddle_ocr(config):\n \"\"\"Call PaddleOCR REST API with the given hyperparameter config.\"\"\"\n import random\n import requests\n from ray import tune\n\n # Worker URLs - random selection (load balances with 2 workers, 2 concurrent trials)\n WORKER_PORTS = [8001, 8002]\n api_url = f\"http://localhost:{random.choice(WORKER_PORTS)}\"\n\n payload = {\n \"pdf_folder\": \"/app/dataset\",\n \"use_doc_orientation_classify\": config.get(\"use_doc_orientation_classify\", False),\n \"use_doc_unwarping\": config.get(\"use_doc_unwarping\", False),\n \"textline_orientation\": config.get(\"textline_orientation\", True),\n \"text_det_thresh\": config.get(\"text_det_thresh\", 0.0),\n \"text_det_box_thresh\": config.get(\"text_det_box_thresh\", 0.0),\n \"text_det_unclip_ratio\": config.get(\"text_det_unclip_ratio\", 1.5),\n \"text_rec_score_thresh\": config.get(\"text_rec_score_thresh\", 0.0),\n \"start_page\": 5,\n \"end_page\": 10,\n }\n\n try:\n response = requests.post(f\"{api_url}/evaluate\", json=payload, timeout=None)\n response.raise_for_status()\n metrics = response.json()\n metrics[\"worker\"] = api_url\n tune.report(**metrics)\n except Exception as e:\n tune.report(\n CER=1.0,\n WER=1.0,\n TIME=0.0,\n PAGES=0,\n TIME_PER_PAGE=0,\n worker=api_url,\n ERROR=str(e)[:500]\n )"
},
{
"cell_type": "markdown",
@@ -390,4 +335,4 @@
},
"nbformat": 4,
"nbformat_minor": 5
}
}