diff --git a/apply_content.py b/apply_content.py index ab963c2..8389a4b 100644 --- a/apply_content.py +++ b/apply_content.py @@ -32,7 +32,7 @@ def md_to_html_para(text): # Italic text = re.sub(r'\*([^*]+)\*', r'\1', text) # Inline code - text = re.sub(r'`([^`]+)`', r'\1', text) + text = re.sub(r'`([^`]+)`', r'\1', text) return text def extract_table_title(lines, current_index): @@ -104,9 +104,10 @@ def parse_md_to_html_blocks(md_content): fig_file = f'figures/figura_{figure_counter}.png' fig_path = os.path.join(BASE_DIR, 'thesis_output', fig_file) - # Create figure with MsoCaption class and bookmark for Word cross-reference + # Create figure with MsoCaption class and proper Word SEQ field for cross-reference + # Format: "Figura X." in bold, title in italic (per UNIR guidelines) bookmark_id = f"_TocFigura{figure_counter}" - html_blocks.append(f'''

Figura {figure_counter}. {fig_title}

''') + html_blocks.append(f'''

Figura {figure_counter}. {fig_title}

''') if os.path.exists(fig_path): # Use actual image with proper Word-compatible format (max 400px width, 500px height to fit page) @@ -131,7 +132,7 @@ def parse_md_to_html_blocks(md_content): code = '\n'.join(code_lines) # Escape HTML entities in code code = code.replace('&', '&').replace('<', '<').replace('>', '>') - html_blocks.append(f'

{code}

') + html_blocks.append(f'

{code}

') i += 1 continue @@ -186,25 +187,30 @@ def parse_md_to_html_blocks(md_content): table_source = lines[i].replace('*', '').replace('Fuente:', '').strip() i += 1 - # Add table title with MsoCaption class and bookmark for Word cross-reference + # Add table title with MsoCaption class and proper Word SEQ field for cross-reference + # Format: "Tabla X." in bold, title in italic (per UNIR guidelines) bookmark_id = f"_TocTabla{table_counter}" if table_title: clean_title = table_title.replace(f"Tabla {table_counter}.", "").strip() - html_blocks.append(f'

Tabla {table_counter}. {clean_title}

') else: - html_blocks.append(f'

Tabla {table_counter}. Tabla de datos.

') + clean_title = "Tabla de datos." + html_blocks.append(f'''

Tabla {table_counter}. {clean_title}

''') - # Build table HTML - table_html = '' + # Build table HTML with APA style (horizontal lines only, no vertical) + table_html = '
' for j, tline in enumerate(table_lines): cells = [c.strip() for c in tline.split('|')[1:-1]] table_html += '' for cell in cells: if j == 0: - # Header row - table_html += f'' + # Header row: top and bottom border, bold text + table_html += f'' + elif j == len(table_lines) - 1: + # Last row: bottom border only + table_html += f'' else: - table_html += f'' + # Middle rows: no borders + table_html += f'' table_html += '' table_html += '

{md_to_html_para(cell)}

{md_to_html_para(cell)}

{md_to_html_para(cell)}

{md_to_html_para(cell)}

{md_to_html_para(cell)}

' html_blocks.append(table_html) diff --git a/docs/02_contexto_estado_arte.md b/docs/02_contexto_estado_arte.md index 6908a9b..74aff2c 100644 --- a/docs/02_contexto_estado_arte.md +++ b/docs/02_contexto_estado_arte.md @@ -165,38 +165,16 @@ Los métodos de HPO incluyen: La combinación Ray Tune + Optuna permite búsquedas eficientes en espacios de alta dimensionalidad. ```mermaid -flowchart TD - subgraph "Ray Tune" - A["Espacio de
búsqueda"] - B["Scheduler
(gestión de trials)"] - C["Trial 1"] - D["Trial 2"] - E["Trial N"] - end - - subgraph "Optuna (TPE)" - F["Modelo probabilístico
de la función objetivo"] - G["Sugiere nueva
configuración"] - end - - subgraph "Evaluación" - H["Ejecuta modelo OCR
con config"] - I["Calcula métricas
(CER, WER)"] - end - - A --> B - B --> C & D & E - C & D & E --> H - H --> I - I -->|"Resultados"| F - F --> G - G -->|"Nueva config"| B - - style A fill:#fff3e0 - style I fill:#e8f5e9 +flowchart LR + A["Espacio de
búsqueda"] --> B["Ray Tune
Scheduler"] + B --> C["Trials
paralelos"] + C --> D["Evaluación
OCR"] + D --> E["Métricas
CER/WER"] + E --> F["Optuna
TPE"] + F -->|"Nueva config"| B ``` -*Figura 2. Arquitectura de optimización de hiperparámetros con Ray Tune y Optuna.* +*Figura 2. Ciclo de optimización de hiperparámetros con Ray Tune y Optuna.* #### HPO en Sistemas OCR diff --git a/docs/07_anexo_a.md b/docs/07_anexo_a.md index cb99fde..6bc9ea3 100644 --- a/docs/07_anexo_a.md +++ b/docs/07_anexo_a.md @@ -15,19 +15,29 @@ El repositorio incluye: ## A.2 Estructura del Repositorio +```mermaid +flowchart LR + root["MastersThesis/"] --> docs["docs/"] + root --> src["src/"] + root --> results["results/"] + root --> instructions["instructions/"] + root --> readme["README.md"] + + src --> nb1["paddle_ocr_fine_tune_unir_raytune.ipynb"] + src --> py1["paddle_ocr_tuning.py"] + src --> py2["dataset_manager.py"] + src --> nb2["prepare_dataset.ipynb"] + src --> csv["raytune_results_*.csv"] ``` -MastersThesis/ -├── docs/ # Capítulos de la tesis en Markdown -├── src/ -│ ├── paddle_ocr_fine_tune_unir_raytune.ipynb # Experimento principal -│ ├── paddle_ocr_tuning.py # Script de evaluación CLI -│ ├── dataset_manager.py # Clase ImageTextDataset -│ ├── prepare_dataset.ipynb # Preparación del dataset -│ └── raytune_paddle_subproc_results_*.csv # Resultados de 64 trials -├── results/ # Resultados de benchmarks -├── instructions/ # Instrucciones y plantilla UNIR -└── README.md -``` + +*Figura 8. Estructura del repositorio del proyecto.* + +**Descripción de componentes:** + +- **docs/**: Capítulos de la tesis en Markdown +- **src/**: Código fuente (notebooks y scripts) +- **results/**: Resultados de benchmarks en CSV +- **instructions/**: Instrucciones y plantilla UNIR ## A.3 Requisitos de Software diff --git a/generate_mermaid_figures.py b/generate_mermaid_figures.py index 4064dcb..67008b0 100644 --- a/generate_mermaid_figures.py +++ b/generate_mermaid_figures.py @@ -19,6 +19,7 @@ def extract_mermaid_diagrams(): '02_contexto_estado_arte.md', '03_objetivos_metodologia.md', '04_desarrollo_especifico.md', + '07_anexo_a.md', ] for md_file in md_files: diff --git a/thesis_output/figures/figura_2.png b/thesis_output/figures/figura_2.png index 25ace56..c12033c 100644 Binary files a/thesis_output/figures/figura_2.png and b/thesis_output/figures/figura_2.png differ diff --git a/thesis_output/figures/figura_8.png b/thesis_output/figures/figura_8.png new file mode 100644 index 0000000..79ef9d1 Binary files /dev/null and b/thesis_output/figures/figura_8.png differ diff --git a/thesis_output/figures/figures_manifest.json b/thesis_output/figures/figures_manifest.json index 2d19865..7ef7170 100644 --- a/thesis_output/figures/figures_manifest.json +++ b/thesis_output/figures/figures_manifest.json @@ -33,5 +33,10 @@ "file": "figura_7.png", "title": "Comparación Baseline vs Optimizado (24 páginas)", "index": 7 + }, + { + "file": "figura_8.png", + "title": "Diagrama de 07_anexo_a.md", + "index": 8 } ] \ No newline at end of file diff --git a/thesis_output/plantilla_individual.htm b/thesis_output/plantilla_individual.htm index 6e0b672..1461cdd 100644 Binary files a/thesis_output/plantilla_individual.htm and b/thesis_output/plantilla_individual.htm differ