Documentation review. (#5)

2026-01-20 14:33:46 +00:00
parent c7ed7b2b9c
commit 9ee2490097
56 changed files with 2182 additions and 945 deletions
--- a/apply_content.py
+++ b/apply_content.py
@@ -3,8 +3,10 @@

 import re
 import os
+import shutil
 from bs4 import BeautifulSoup, NavigableString
 from latex2mathml.converter import convert as latex_to_mathml
+from PIL import Image

 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 TEMPLATE_INPUT = os.path.join(BASE_DIR, 'instructions/plantilla_individual.htm')
@@ -120,13 +122,13 @@ def parse_md_to_html_blocks(md_content):
                mermaid_lines.append(lines[i])
                i += 1

-            # Try to extract title from mermaid content (YAML format: title: "...")
+            # Try to extract title from mermaid content (YAML format)
            mermaid_content = '\n'.join(mermaid_lines)
-            # Match YAML format: title: "Title" or title: 'Title'
+            # Match title with quotes: title: "Something" or title: 'Something'
            title_match = re.search(r'title:\s*["\']([^"\']+)["\']', mermaid_content)
            if not title_match:
-                # Fallback to non-YAML format: title "Title"
-                title_match = re.search(r'title\s+["\']?([^"\'"\n]+)["\']?', mermaid_content)
+                # Match title without quotes: title: Something
+                title_match = re.search(r'title:\s*([^"\'\n]+)', mermaid_content)
            if title_match:
                fig_title = title_match.group(1).strip()
            else:
@@ -143,8 +145,24 @@ def parse_md_to_html_blocks(md_content):
            html_blocks.append(f'''<a name="{bookmark_id}"></a><p class=MsoCaption style="text-align:center"><b><span lang=ES style="font-size:12.0pt;line-height:150%">Figura <!--[if supportFields]><span style='mso-element:field-begin'></span> SEQ Figura \\* ARABIC <span style='mso-element:field-separator'></span><![endif]-->{figure_counter}<!--[if supportFields]><span style='mso-element:field-end'></span><![endif]-->.</span></b><span lang=ES style="font-size:12.0pt;line-height:150%"> </span><i><span lang=ES style="font-size:12.0pt;line-height:150%">{fig_title}</span></i></p>''')

            if os.path.exists(fig_path):
-                # Use Word-compatible width in cm (A4 text area is ~16cm wide, use ~12cm max)
-                html_blocks.append(f'''<p class=MsoNormal style="text-align:center"><span lang=ES><img style="width:12cm;max-width:100%" src="{fig_file}" alt="{fig_title}"/></span></p>''')
+                # Read actual image dimensions and scale to fit page width
+                img = Image.open(fig_path)
+                orig_w, orig_h = img.size
+
+                # Scale to fit max width of 566px (15cm at 96dpi) while preserving aspect ratio
+                max_width = 566
+                if orig_w > max_width:
+                    scale = max_width / orig_w
+                    new_w = max_width
+                    new_h = int(orig_h * scale)
+                else:
+                    new_w, new_h = orig_w, orig_h
+
+                # Convert to pt (1px at 96dpi = 0.75pt)
+                w_pt = new_w * 0.75
+                h_pt = new_h * 0.75
+
+                html_blocks.append(f'''<p class=MsoNormal style="text-align:center"><span lang=ES><img width="{new_w}" height="{new_h}" style="width:{w_pt}pt;height:{h_pt}pt;display:block;margin:0 auto" src="{fig_file}" alt="{fig_title}"/></span></p>''')
            else:
                # Fallback to placeholder
                html_blocks.append(f'''<p class=MsoNormal style="text-align:center;border:1px dashed #999;padding:20px;margin:10px 40px;background:#f9f9f9"><span lang=ES style="color:#666">[Insertar diagrama Mermaid aquí]</span></p>''')
@@ -165,7 +183,9 @@ def parse_md_to_html_blocks(md_content):
            code = '\n'.join(code_lines)
            # Escape HTML entities in code
            code = code.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
-            html_blocks.append(f'<p class=MsoNormal style="margin-left:1cm"><span style="font-family:Consolas;font-size:9pt"><pre>{code}</pre></span></p>')
+            html_blocks.append(f'''<div style="background:#E6F4F9;border-top:solid #0098CD .5pt;border-bottom:solid #0098CD .5pt;padding:8pt 12pt;margin:6pt 0">
+<pre style="font-family:Consolas,monospace;font-size:9pt;color:#333333;margin:0;white-space:pre-wrap;word-wrap:break-word">{code}</pre>
+</div>''')
            i += 1
            continue

@@ -239,7 +259,8 @@ def parse_md_to_html_blocks(md_content):
            html_blocks.append(f'''<a name="{bookmark_id}"></a><p class=MsoCaption><b><span lang=ES style="font-size:12.0pt;line-height:150%">Tabla <!--[if supportFields]><span style='mso-element:field-begin'></span> SEQ Tabla \\* ARABIC <span style='mso-element:field-separator'></span><![endif]-->{table_counter}<!--[if supportFields]><span style='mso-element:field-end'></span><![endif]-->.</span></b><span lang=ES style="font-size:12.0pt;line-height:150%"> </span><i><span lang=ES style="font-size:12.0pt;line-height:150%">{clean_title}</span></i></p>''')

            # Build table HTML with APA style (horizontal lines only, no vertical)
-            table_html = '<table class=MsoTableGrid border=1 cellspacing=0 cellpadding=0 style="border-collapse:collapse;margin-left:auto;margin-right:auto;mso-table-style-name:\'Plain Table 1\'">'
+            # Wrap in centered div for Word compatibility
+            table_html = '<div align="center"><table class=MsoTableGrid border=1 cellspacing=0 cellpadding=0 align="center" style="border-collapse:collapse;margin-left:auto;margin-right:auto;mso-table-style-name:\'Plain Table 1\'">'
            for j, tline in enumerate(table_lines):
                cells = [c.strip() for c in tline.split('|')[1:-1]]
                table_html += '<tr>'
@@ -254,7 +275,7 @@ def parse_md_to_html_blocks(md_content):
                        # Middle rows: no borders
                        table_html += f'<td style="border:none;padding:5px"><p class=MsoNormal style="margin:0;text-align:center"><span lang=ES>{md_to_html_para(cell)}</span></p></td>'
                table_html += '</tr>'
-            table_html += '</table>'
+            table_html += '</table></div>'
            html_blocks.append(table_html)

            # Add source with proper template format
@@ -269,7 +290,7 @@ def parse_md_to_html_blocks(md_content):
            while i < len(lines) and lines[i].startswith('>'):
                quote_text += ' ' + lines[i][1:].strip()
                i += 1
-            html_blocks.append(f'<p class=MsoNormal style="margin-left:2cm;margin-right:1cm"><i><span lang=ES>{md_to_html_para(quote_text)}</span></i></p>')
+            html_blocks.append(f'<p class=MsoQuote><i><span lang=ES>{md_to_html_para(quote_text)}</span></i></p>')
            continue

        # Bullet list
@@ -640,6 +661,15 @@ def main():
    output_html = str(soup)
    write_file(TEMPLATE_OUTPUT, output_html)

+    # Copy template support files (header.htm, images, etc.)
+    support_files_src = os.path.join(BASE_DIR, 'instructions/plantilla_individual_files')
+    support_files_dst = os.path.join(BASE_DIR, 'thesis_output/plantilla_individual_files')
+    if os.path.exists(support_files_src):
+        if os.path.exists(support_files_dst):
+            shutil.rmtree(support_files_dst)
+        shutil.copytree(support_files_src, support_files_dst)
+        print(f"✓ Copied template support files")
+
    print(f"✓ Done! Modified: {TEMPLATE_OUTPUT}")
    print("\nTo convert to DOCX:")
    print("1. Open the .htm file in Microsoft Word")