Documentation review and data consistency.
Some checks failed
build_docker / essential (push) Successful in 0s
build_docker / build_paddle_ocr (push) Successful in 4m57s
build_docker / build_raytune (push) Has been cancelled
build_docker / build_easyocr_gpu (push) Has been cancelled
build_docker / build_doctr (push) Has been cancelled
build_docker / build_doctr_gpu (push) Has been cancelled
build_docker / build_paddle_ocr_gpu (push) Has been cancelled
build_docker / build_easyocr (push) Has been cancelled

This commit is contained in:
2026-01-24 15:53:34 +01:00
parent 9ee2490097
commit 0089b34cb3
48 changed files with 1030 additions and 930 deletions

View File

@@ -95,6 +95,25 @@ Results are saved to `src/results/` as CSV files:
- `raytune_doctr_results_<timestamp>.csv`
- `raytune_easyocr_results_<timestamp>.csv`
### Correlation Analysis
Correlation tables used in the thesis are derived from the CSV results with a local script:
```bash
source .venv/bin/activate
python tem/scripts/compute_correlations_all.py
```
Outputs are written to `src/results/correlations/`:
- `paddle_correlations.csv`
- `doctr_correlations.csv`
- `easyocr_correlations.csv`
These files are computed from the corresponding inputs:
- `src/results/raytune_paddle_results_20260119_122609.csv`
- `src/results/raytune_doctr_results_20260119_121445.csv`
- `src/results/raytune_easyocr_results_20260119_120204.csv`
## Directory Structure
```

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -1,82 +0,0 @@
# docker-compose.tuning.yml - Ray Tune with all OCR services (PaddleOCR + DocTR)
# Usage:
# docker compose -f docker-compose.tuning.yml up -d paddle-ocr-gpu doctr-gpu
# docker compose -f docker-compose.tuning.yml run raytune --service paddle --samples 64
# docker compose -f docker-compose.tuning.yml run raytune --service doctr --samples 64
# docker compose -f docker-compose.tuning.yml down
#
# Note: EasyOCR uses port 8002 (same as PaddleOCR). Use docker-compose.tuning.easyocr.yml separately.
services:
raytune:
image: seryus.ddns.net/unir/raytune:latest
network_mode: host
shm_size: '5gb'
volumes:
- ./results:/app/results:rw
environment:
- PYTHONUNBUFFERED=1
paddle-ocr-gpu:
image: seryus.ddns.net/unir/paddle-ocr-gpu:latest
container_name: paddle-ocr-gpu-tuning
ports:
- "8002:8000"
volumes:
- ./dataset:/app/dataset:ro
- ./debugset:/app/debugset:rw
- paddlex-cache:/root/.paddlex
environment:
- PYTHONUNBUFFERED=1
- CUDA_VISIBLE_DEVICES=0
- PADDLE_DET_MODEL=PP-OCRv5_mobile_det
- PADDLE_REC_MODEL=PP-OCRv5_mobile_rec
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
restart: unless-stopped
healthcheck:
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
interval: 30s
timeout: 10s
retries: 3
start_period: 60s
doctr-gpu:
image: seryus.ddns.net/unir/doctr-gpu:latest
container_name: doctr-gpu-tuning
ports:
- "8003:8000"
volumes:
- ./dataset:/app/dataset:ro
- ./debugset:/app/debugset:rw
- doctr-cache:/root/.cache/doctr
environment:
- PYTHONUNBUFFERED=1
- CUDA_VISIBLE_DEVICES=0
- DOCTR_DET_ARCH=db_resnet50
- DOCTR_RECO_ARCH=crnn_vgg16_bn
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
restart: unless-stopped
healthcheck:
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
interval: 30s
timeout: 10s
retries: 3
start_period: 180s
volumes:
paddlex-cache:
name: paddlex-model-cache
doctr-cache:
name: doctr-model-cache