Documentation review and data consistency.
Some checks failed
build_docker / essential (push) Successful in 0s
build_docker / build_paddle_ocr (push) Successful in 4m57s
build_docker / build_raytune (push) Has been cancelled
build_docker / build_easyocr_gpu (push) Has been cancelled
build_docker / build_doctr (push) Has been cancelled
build_docker / build_doctr_gpu (push) Has been cancelled
build_docker / build_paddle_ocr_gpu (push) Has been cancelled
build_docker / build_easyocr (push) Has been cancelled
Some checks failed
build_docker / essential (push) Successful in 0s
build_docker / build_paddle_ocr (push) Successful in 4m57s
build_docker / build_raytune (push) Has been cancelled
build_docker / build_easyocr_gpu (push) Has been cancelled
build_docker / build_doctr (push) Has been cancelled
build_docker / build_doctr_gpu (push) Has been cancelled
build_docker / build_paddle_ocr_gpu (push) Has been cancelled
build_docker / build_easyocr (push) Has been cancelled
This commit is contained in:
@@ -95,6 +95,25 @@ Results are saved to `src/results/` as CSV files:
|
||||
- `raytune_doctr_results_<timestamp>.csv`
|
||||
- `raytune_easyocr_results_<timestamp>.csv`
|
||||
|
||||
### Correlation Analysis
|
||||
|
||||
Correlation tables used in the thesis are derived from the CSV results with a local script:
|
||||
|
||||
```bash
|
||||
source .venv/bin/activate
|
||||
python tem/scripts/compute_correlations_all.py
|
||||
```
|
||||
|
||||
Outputs are written to `src/results/correlations/`:
|
||||
- `paddle_correlations.csv`
|
||||
- `doctr_correlations.csv`
|
||||
- `easyocr_correlations.csv`
|
||||
|
||||
These files are computed from the corresponding inputs:
|
||||
- `src/results/raytune_paddle_results_20260119_122609.csv`
|
||||
- `src/results/raytune_doctr_results_20260119_121445.csv`
|
||||
- `src/results/raytune_easyocr_results_20260119_120204.csv`
|
||||
|
||||
## Directory Structure
|
||||
|
||||
```
|
||||
|
||||
1129
src/archived/ocr_benchmark_notebook.ipynb
Normal file
1129
src/archived/ocr_benchmark_notebook.ipynb
Normal file
File diff suppressed because one or more lines are too long
1285
src/archived/paddle_ocr_fine_tune_unir.ipynb
Normal file
1285
src/archived/paddle_ocr_fine_tune_unir.ipynb
Normal file
File diff suppressed because one or more lines are too long
@@ -1,82 +0,0 @@
|
||||
# docker-compose.tuning.yml - Ray Tune with all OCR services (PaddleOCR + DocTR)
|
||||
# Usage:
|
||||
# docker compose -f docker-compose.tuning.yml up -d paddle-ocr-gpu doctr-gpu
|
||||
# docker compose -f docker-compose.tuning.yml run raytune --service paddle --samples 64
|
||||
# docker compose -f docker-compose.tuning.yml run raytune --service doctr --samples 64
|
||||
# docker compose -f docker-compose.tuning.yml down
|
||||
#
|
||||
# Note: EasyOCR uses port 8002 (same as PaddleOCR). Use docker-compose.tuning.easyocr.yml separately.
|
||||
|
||||
services:
|
||||
raytune:
|
||||
image: seryus.ddns.net/unir/raytune:latest
|
||||
network_mode: host
|
||||
shm_size: '5gb'
|
||||
volumes:
|
||||
- ./results:/app/results:rw
|
||||
environment:
|
||||
- PYTHONUNBUFFERED=1
|
||||
|
||||
paddle-ocr-gpu:
|
||||
image: seryus.ddns.net/unir/paddle-ocr-gpu:latest
|
||||
container_name: paddle-ocr-gpu-tuning
|
||||
ports:
|
||||
- "8002:8000"
|
||||
volumes:
|
||||
- ./dataset:/app/dataset:ro
|
||||
- ./debugset:/app/debugset:rw
|
||||
- paddlex-cache:/root/.paddlex
|
||||
environment:
|
||||
- PYTHONUNBUFFERED=1
|
||||
- CUDA_VISIBLE_DEVICES=0
|
||||
- PADDLE_DET_MODEL=PP-OCRv5_mobile_det
|
||||
- PADDLE_REC_MODEL=PP-OCRv5_mobile_rec
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: 1
|
||||
capabilities: [gpu]
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 60s
|
||||
|
||||
doctr-gpu:
|
||||
image: seryus.ddns.net/unir/doctr-gpu:latest
|
||||
container_name: doctr-gpu-tuning
|
||||
ports:
|
||||
- "8003:8000"
|
||||
volumes:
|
||||
- ./dataset:/app/dataset:ro
|
||||
- ./debugset:/app/debugset:rw
|
||||
- doctr-cache:/root/.cache/doctr
|
||||
environment:
|
||||
- PYTHONUNBUFFERED=1
|
||||
- CUDA_VISIBLE_DEVICES=0
|
||||
- DOCTR_DET_ARCH=db_resnet50
|
||||
- DOCTR_RECO_ARCH=crnn_vgg16_bn
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: 1
|
||||
capabilities: [gpu]
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 180s
|
||||
|
||||
volumes:
|
||||
paddlex-cache:
|
||||
name: paddlex-model-cache
|
||||
doctr-cache:
|
||||
name: doctr-model-cache
|
||||
Reference in New Issue
Block a user