diff --git a/apply_content.py b/apply_content.py
index ab963c2..8389a4b 100644
--- a/apply_content.py
+++ b/apply_content.py
@@ -32,7 +32,7 @@ def md_to_html_para(text):
# Italic
text = re.sub(r'\*([^*]+)\*', r'\1', text)
# Inline code
- text = re.sub(r'`([^`]+)`', r'\1', text)
+ text = re.sub(r'`([^`]+)`', r'\1', text)
return text
def extract_table_title(lines, current_index):
@@ -104,9 +104,10 @@ def parse_md_to_html_blocks(md_content):
fig_file = f'figures/figura_{figure_counter}.png'
fig_path = os.path.join(BASE_DIR, 'thesis_output', fig_file)
- # Create figure with MsoCaption class and bookmark for Word cross-reference
+ # Create figure with MsoCaption class and proper Word SEQ field for cross-reference
+ # Format: "Figura X." in bold, title in italic (per UNIR guidelines)
bookmark_id = f"_TocFigura{figure_counter}"
- html_blocks.append(f'''
Figura {figure_counter}. {fig_title}
''')
+ html_blocks.append(f'''Figura {figure_counter}. {fig_title}
''')
if os.path.exists(fig_path):
# Use actual image with proper Word-compatible format (max 400px width, 500px height to fit page)
@@ -131,7 +132,7 @@ def parse_md_to_html_blocks(md_content):
code = '\n'.join(code_lines)
# Escape HTML entities in code
code = code.replace('&', '&').replace('<', '<').replace('>', '>')
- html_blocks.append(f'{code}
')
+ html_blocks.append(f'{code}
')
i += 1
continue
@@ -186,25 +187,30 @@ def parse_md_to_html_blocks(md_content):
table_source = lines[i].replace('*', '').replace('Fuente:', '').strip()
i += 1
- # Add table title with MsoCaption class and bookmark for Word cross-reference
+ # Add table title with MsoCaption class and proper Word SEQ field for cross-reference
+ # Format: "Tabla X." in bold, title in italic (per UNIR guidelines)
bookmark_id = f"_TocTabla{table_counter}"
if table_title:
clean_title = table_title.replace(f"Tabla {table_counter}.", "").strip()
- html_blocks.append(f'Tabla {table_counter}. {clean_title}
')
else:
- html_blocks.append(f'Tabla {table_counter}. Tabla de datos.
')
+ clean_title = "Tabla de datos."
+ html_blocks.append(f'''Tabla {table_counter}. {clean_title}
''')
- # Build table HTML
- table_html = ''
+ # Build table HTML with APA style (horizontal lines only, no vertical)
+ table_html = ''
for j, tline in enumerate(table_lines):
cells = [c.strip() for c in tline.split('|')[1:-1]]
table_html += ''
for cell in cells:
if j == 0:
- # Header row
- table_html += f'{md_to_html_para(cell)} | '
+ # Header row: top and bottom border, bold text
+ table_html += f'{md_to_html_para(cell)} | '
+ elif j == len(table_lines) - 1:
+ # Last row: bottom border only
+ table_html += f'{md_to_html_para(cell)} | '
else:
- table_html += f'{md_to_html_para(cell)} | '
+ # Middle rows: no borders
+ table_html += f'{md_to_html_para(cell)} | '
table_html += '
'
table_html += '
'
html_blocks.append(table_html)
diff --git a/docs/02_contexto_estado_arte.md b/docs/02_contexto_estado_arte.md
index 6908a9b..74aff2c 100644
--- a/docs/02_contexto_estado_arte.md
+++ b/docs/02_contexto_estado_arte.md
@@ -165,38 +165,16 @@ Los métodos de HPO incluyen:
La combinación Ray Tune + Optuna permite búsquedas eficientes en espacios de alta dimensionalidad.
```mermaid
-flowchart TD
- subgraph "Ray Tune"
- A["Espacio de
búsqueda"]
- B["Scheduler
(gestión de trials)"]
- C["Trial 1"]
- D["Trial 2"]
- E["Trial N"]
- end
-
- subgraph "Optuna (TPE)"
- F["Modelo probabilístico
de la función objetivo"]
- G["Sugiere nueva
configuración"]
- end
-
- subgraph "Evaluación"
- H["Ejecuta modelo OCR
con config"]
- I["Calcula métricas
(CER, WER)"]
- end
-
- A --> B
- B --> C & D & E
- C & D & E --> H
- H --> I
- I -->|"Resultados"| F
- F --> G
- G -->|"Nueva config"| B
-
- style A fill:#fff3e0
- style I fill:#e8f5e9
+flowchart LR
+ A["Espacio de
búsqueda"] --> B["Ray Tune
Scheduler"]
+ B --> C["Trials
paralelos"]
+ C --> D["Evaluación
OCR"]
+ D --> E["Métricas
CER/WER"]
+ E --> F["Optuna
TPE"]
+ F -->|"Nueva config"| B
```
-*Figura 2. Arquitectura de optimización de hiperparámetros con Ray Tune y Optuna.*
+*Figura 2. Ciclo de optimización de hiperparámetros con Ray Tune y Optuna.*
#### HPO en Sistemas OCR
diff --git a/docs/07_anexo_a.md b/docs/07_anexo_a.md
index cb99fde..6bc9ea3 100644
--- a/docs/07_anexo_a.md
+++ b/docs/07_anexo_a.md
@@ -15,19 +15,29 @@ El repositorio incluye:
## A.2 Estructura del Repositorio
+```mermaid
+flowchart LR
+ root["MastersThesis/"] --> docs["docs/"]
+ root --> src["src/"]
+ root --> results["results/"]
+ root --> instructions["instructions/"]
+ root --> readme["README.md"]
+
+ src --> nb1["paddle_ocr_fine_tune_unir_raytune.ipynb"]
+ src --> py1["paddle_ocr_tuning.py"]
+ src --> py2["dataset_manager.py"]
+ src --> nb2["prepare_dataset.ipynb"]
+ src --> csv["raytune_results_*.csv"]
```
-MastersThesis/
-├── docs/ # Capítulos de la tesis en Markdown
-├── src/
-│ ├── paddle_ocr_fine_tune_unir_raytune.ipynb # Experimento principal
-│ ├── paddle_ocr_tuning.py # Script de evaluación CLI
-│ ├── dataset_manager.py # Clase ImageTextDataset
-│ ├── prepare_dataset.ipynb # Preparación del dataset
-│ └── raytune_paddle_subproc_results_*.csv # Resultados de 64 trials
-├── results/ # Resultados de benchmarks
-├── instructions/ # Instrucciones y plantilla UNIR
-└── README.md
-```
+
+*Figura 8. Estructura del repositorio del proyecto.*
+
+**Descripción de componentes:**
+
+- **docs/**: Capítulos de la tesis en Markdown
+- **src/**: Código fuente (notebooks y scripts)
+- **results/**: Resultados de benchmarks en CSV
+- **instructions/**: Instrucciones y plantilla UNIR
## A.3 Requisitos de Software
diff --git a/generate_mermaid_figures.py b/generate_mermaid_figures.py
index 4064dcb..67008b0 100644
--- a/generate_mermaid_figures.py
+++ b/generate_mermaid_figures.py
@@ -19,6 +19,7 @@ def extract_mermaid_diagrams():
'02_contexto_estado_arte.md',
'03_objetivos_metodologia.md',
'04_desarrollo_especifico.md',
+ '07_anexo_a.md',
]
for md_file in md_files:
diff --git a/thesis_output/figures/figura_2.png b/thesis_output/figures/figura_2.png
index 25ace56..c12033c 100644
Binary files a/thesis_output/figures/figura_2.png and b/thesis_output/figures/figura_2.png differ
diff --git a/thesis_output/figures/figura_8.png b/thesis_output/figures/figura_8.png
new file mode 100644
index 0000000..79ef9d1
Binary files /dev/null and b/thesis_output/figures/figura_8.png differ
diff --git a/thesis_output/figures/figures_manifest.json b/thesis_output/figures/figures_manifest.json
index 2d19865..7ef7170 100644
--- a/thesis_output/figures/figures_manifest.json
+++ b/thesis_output/figures/figures_manifest.json
@@ -33,5 +33,10 @@
"file": "figura_7.png",
"title": "Comparación Baseline vs Optimizado (24 páginas)",
"index": 7
+ },
+ {
+ "file": "figura_8.png",
+ "title": "Diagrama de 07_anexo_a.md",
+ "index": 8
}
]
\ No newline at end of file
diff --git a/thesis_output/plantilla_individual.htm b/thesis_output/plantilla_individual.htm
index 6e0b672..1461cdd 100644
Binary files a/thesis_output/plantilla_individual.htm and b/thesis_output/plantilla_individual.htm differ