links regeneration
Some checks failed
build_docker / essential (push) Successful in 0s
build_docker / build_paddle_ocr (push) Successful in 5m30s
build_docker / build_paddle_ocr_gpu (push) Successful in 22m0s
build_docker / build_easyocr (push) Successful in 18m14s
build_docker / build_doctr_gpu (push) Has been cancelled
build_docker / build_raytune (push) Has been cancelled
build_docker / build_easyocr_gpu (push) Has started running
build_docker / build_doctr (push) Has been cancelled
Some checks failed
build_docker / essential (push) Successful in 0s
build_docker / build_paddle_ocr (push) Successful in 5m30s
build_docker / build_paddle_ocr_gpu (push) Successful in 22m0s
build_docker / build_easyocr (push) Successful in 18m14s
build_docker / build_doctr_gpu (push) Has been cancelled
build_docker / build_raytune (push) Has been cancelled
build_docker / build_easyocr_gpu (push) Has started running
build_docker / build_doctr (push) Has been cancelled
This commit is contained in:
@@ -16,6 +16,10 @@ DOCS_DIR = os.path.join(BASE_DIR, 'docs')
|
||||
# Global counters for tables and figures
|
||||
table_counter = 0
|
||||
figure_counter = 0
|
||||
anexo_table_counter = 0
|
||||
anexo_figure_counter = 0
|
||||
# Global sequential counter for figure filenames (figura_1.png, figura_2.png, etc.)
|
||||
global_figure_index = 0
|
||||
|
||||
def read_file(path):
|
||||
try:
|
||||
@@ -99,7 +103,7 @@ def extract_figure_title_from_mermaid(lines, current_index):
|
||||
|
||||
def parse_md_to_html_blocks(md_content, is_anexo=False):
|
||||
"""Convert markdown content to HTML blocks with template styles."""
|
||||
global table_counter, figure_counter
|
||||
global table_counter, figure_counter, anexo_table_counter, anexo_figure_counter, global_figure_index
|
||||
|
||||
html_blocks = []
|
||||
lines = md_content.split('\n')
|
||||
@@ -115,7 +119,17 @@ def parse_md_to_html_blocks(md_content, is_anexo=False):
|
||||
|
||||
# Mermaid diagram - convert to figure with actual image
|
||||
if line.strip().startswith('```mermaid'):
|
||||
figure_counter += 1
|
||||
# Always increment global index for sequential filenames
|
||||
global_figure_index += 1
|
||||
|
||||
# Use Anexo-specific counter with "A" prefix for display, or global counter
|
||||
if is_anexo:
|
||||
anexo_figure_counter += 1
|
||||
fig_num = f"A{anexo_figure_counter}" # Display number: A1, A2, A3...
|
||||
else:
|
||||
figure_counter += 1
|
||||
fig_num = str(figure_counter) # Display number: 1, 2, 3...
|
||||
|
||||
mermaid_lines = []
|
||||
i += 1
|
||||
while i < len(lines) and not lines[i].strip() == '```':
|
||||
@@ -132,18 +146,22 @@ def parse_md_to_html_blocks(md_content, is_anexo=False):
|
||||
if title_match:
|
||||
fig_title = title_match.group(1).strip()
|
||||
else:
|
||||
fig_title = f"Diagrama {figure_counter}"
|
||||
fig_title = f"Diagrama {fig_num}"
|
||||
|
||||
# Check if the generated PNG exists
|
||||
fig_file = f'figures/figura_{figure_counter}.png'
|
||||
# Use global sequential index for filename (figura_1.png, figura_2.png, etc.)
|
||||
fig_file = f'figures/figura_{global_figure_index}.png'
|
||||
fig_path = os.path.join(BASE_DIR, 'thesis_output', fig_file)
|
||||
|
||||
# Create figure with MsoCaption class and proper Word SEQ field for cross-reference
|
||||
# Format: "Figura X." in bold, title in italic (per UNIR guidelines)
|
||||
# Word TOC looks for text with Caption style - anchor must be outside main caption text
|
||||
bookmark_id = f"_Ref_Fig{figure_counter}"
|
||||
bookmark_id = f"_Ref_Fig{fig_num}"
|
||||
# mso-pagination:keep-with-next ensures caption stays with figure image (correct MSO property)
|
||||
html_blocks.append(f'''<a name="{bookmark_id}"></a><p class=MsoCaption style="text-align:center;mso-pagination:keep-with-next"><b><span lang=ES style="font-size:12.0pt;line-height:150%">Figura <!--[if supportFields]><span style='mso-element:field-begin'></span> SEQ Figura \\* ARABIC <span style='mso-element:field-separator'></span><![endif]-->{figure_counter}<!--[if supportFields]><span style='mso-element:field-end'></span><![endif]-->.</span></b><span lang=ES style="font-size:12.0pt;line-height:150%"> </span><i><span lang=ES style="font-size:12.0pt;line-height:150%">{fig_title}</span></i></p>''')
|
||||
# For Anexo figures, use static text (no SEQ field) to prevent Word from overwriting A1, A2...
|
||||
if is_anexo:
|
||||
html_blocks.append(f'''<a name="{bookmark_id}"></a><p class=MsoCaption style="text-align:center;mso-pagination:keep-with-next"><b><span lang=ES style="font-size:12.0pt;line-height:150%">Figura {fig_num}.</span></b><span lang=ES style="font-size:12.0pt;line-height:150%"> </span><i><span lang=ES style="font-size:12.0pt;line-height:150%">{fig_title}</span></i></p>''')
|
||||
else:
|
||||
html_blocks.append(f'''<a name="{bookmark_id}"></a><p class=MsoCaption style="text-align:center;mso-pagination:keep-with-next"><b><span lang=ES style="font-size:12.0pt;line-height:150%">Figura <!--[if supportFields]><span style='mso-element:field-begin'></span> SEQ Figura \\* ARABIC <span style='mso-element:field-separator'></span><![endif]-->{fig_num}<!--[if supportFields]><span style='mso-element:field-end'></span><![endif]-->.</span></b><span lang=ES style="font-size:12.0pt;line-height:150%"> </span><i><span lang=ES style="font-size:12.0pt;line-height:150%">{fig_title}</span></i></p>''')
|
||||
|
||||
if os.path.exists(fig_path):
|
||||
# Read actual image dimensions and scale to fit page width
|
||||
@@ -216,7 +234,8 @@ def parse_md_to_html_blocks(md_content, is_anexo=False):
|
||||
# Headers - ## becomes h2, ### becomes h3
|
||||
if line.startswith('####'):
|
||||
text = line.lstrip('#').strip()
|
||||
html_blocks.append(f'<h4><span lang=ES>{text}</span></h4>')
|
||||
# Apply consistent styling like h2/h3, disable numbering for h4
|
||||
html_blocks.append(f'<h4 style="mso-list:none"><span lang=ES style="text-transform:none">{text}</span></h4>')
|
||||
i += 1
|
||||
continue
|
||||
elif line.startswith('###'):
|
||||
@@ -246,7 +265,13 @@ def parse_md_to_html_blocks(md_content, is_anexo=False):
|
||||
|
||||
# Table - check for table title pattern first
|
||||
if '|' in line and i + 1 < len(lines) and '---' in lines[i + 1]:
|
||||
table_counter += 1
|
||||
# Use Anexo-specific counter with "A" prefix, or global counter
|
||||
if is_anexo:
|
||||
anexo_table_counter += 1
|
||||
table_num = f"A{anexo_table_counter}"
|
||||
else:
|
||||
table_counter += 1
|
||||
table_num = str(table_counter)
|
||||
|
||||
# Check if previous line has table title (e.g., **Tabla 1.** *Title*)
|
||||
table_title = None
|
||||
@@ -281,7 +306,7 @@ def parse_md_to_html_blocks(md_content, is_anexo=False):
|
||||
# Add table title with MsoCaption class and proper Word SEQ field for cross-reference
|
||||
# Format: "Tabla X." in bold, title in italic (per UNIR guidelines)
|
||||
# Word TOC looks for text with Caption style - anchor must be outside main caption text
|
||||
bookmark_id = f"_Ref_Tab{table_counter}"
|
||||
bookmark_id = f"_Ref_Tab{table_num}"
|
||||
if table_title:
|
||||
# Remove any "Tabla X." or "Tabla AX." pattern from the title
|
||||
clean_title = re.sub(r'^Tabla\s+[A-Z]?\d+\.\s*', '', table_title).strip()
|
||||
@@ -291,7 +316,11 @@ def parse_md_to_html_blocks(md_content, is_anexo=False):
|
||||
else:
|
||||
clean_title = "Tabla de datos."
|
||||
# mso-pagination:keep-with-next ensures caption stays with table (correct MSO property)
|
||||
html_blocks.append(f'''<a name="{bookmark_id}"></a><p class=MsoCaption style="mso-pagination:keep-with-next"><b><span lang=ES style="font-size:12.0pt;line-height:150%">Tabla <!--[if supportFields]><span style='mso-element:field-begin'></span> SEQ Tabla \\* ARABIC <span style='mso-element:field-separator'></span><![endif]-->{table_counter}<!--[if supportFields]><span style='mso-element:field-end'></span><![endif]-->.</span></b><span lang=ES style="font-size:12.0pt;line-height:150%"> </span><i><span lang=ES style="font-size:12.0pt;line-height:150%">{clean_title}</span></i></p>''')
|
||||
# For Anexo tables, use static text (no SEQ field) to prevent Word from overwriting A1, A2...
|
||||
if is_anexo:
|
||||
html_blocks.append(f'''<a name="{bookmark_id}"></a><p class=MsoCaption style="mso-pagination:keep-with-next"><b><span lang=ES style="font-size:12.0pt;line-height:150%">Tabla {table_num}.</span></b><span lang=ES style="font-size:12.0pt;line-height:150%"> </span><i><span lang=ES style="font-size:12.0pt;line-height:150%">{clean_title}</span></i></p>''')
|
||||
else:
|
||||
html_blocks.append(f'''<a name="{bookmark_id}"></a><p class=MsoCaption style="mso-pagination:keep-with-next"><b><span lang=ES style="font-size:12.0pt;line-height:150%">Tabla <!--[if supportFields]><span style='mso-element:field-begin'></span> SEQ Tabla \\* ARABIC <span style='mso-element:field-separator'></span><![endif]-->{table_num}<!--[if supportFields]><span style='mso-element:field-end'></span><![endif]-->.</span></b><span lang=ES style="font-size:12.0pt;line-height:150%"> </span><i><span lang=ES style="font-size:12.0pt;line-height:150%">{clean_title}</span></i></p>''')
|
||||
|
||||
# Build table HTML with APA style (horizontal lines only, no vertical)
|
||||
table_html = '<div align="center"><table class=MsoTableGrid border=1 cellspacing=0 cellpadding=0 align="center" style="border-collapse:collapse;margin-left:auto;margin-right:auto;mso-table-style-name:\'Plain Table 1\'">'
|
||||
@@ -445,25 +474,25 @@ def extract_resumen_parts(resumen_content):
|
||||
spanish_keywords = ''
|
||||
if '**Palabras clave:**' in spanish_part:
|
||||
text_part, kw_part = spanish_part.split('**Palabras clave:**')
|
||||
spanish_text = text_part.replace('# Resumen', '').strip()
|
||||
spanish_keywords = kw_part.strip()
|
||||
spanish_text = md_to_html_para(text_part.replace('# Resumen', '').strip())
|
||||
spanish_keywords = md_to_html_para(kw_part.strip())
|
||||
else:
|
||||
spanish_text = spanish_part.replace('# Resumen', '').strip()
|
||||
spanish_text = md_to_html_para(spanish_part.replace('# Resumen', '').strip())
|
||||
|
||||
# Extract English content
|
||||
english_text = ''
|
||||
english_keywords = ''
|
||||
if '**Keywords:**' in english_part:
|
||||
text_part, kw_part = english_part.split('**Keywords:**')
|
||||
english_text = text_part.replace('# Abstract', '').strip()
|
||||
english_keywords = kw_part.strip()
|
||||
english_text = md_to_html_para(text_part.replace('# Abstract', '').strip())
|
||||
english_keywords = md_to_html_para(kw_part.strip())
|
||||
else:
|
||||
english_text = english_part.replace('# Abstract', '').strip()
|
||||
english_text = md_to_html_para(english_part.replace('# Abstract', '').strip())
|
||||
|
||||
return spanish_text, spanish_keywords, english_text, english_keywords
|
||||
|
||||
def main():
|
||||
global table_counter, figure_counter
|
||||
global table_counter, figure_counter, anexo_table_counter, anexo_figure_counter
|
||||
|
||||
print("Reading template...")
|
||||
html_content = read_file(TEMPLATE_INPUT)
|
||||
@@ -692,7 +721,7 @@ def main():
|
||||
insert_point.insert_after(new_elem)
|
||||
print(f" ✓ Replaced content")
|
||||
|
||||
print(f"\nSummary: {table_counter} tables, {figure_counter} figures processed")
|
||||
print(f"\nSummary: {table_counter} tables + {anexo_table_counter} Anexo tables, {figure_counter} figures + {anexo_figure_counter} Anexo figures processed")
|
||||
|
||||
print("Saving modified template...")
|
||||
output_html = str(soup)
|
||||
|
||||
Reference in New Issue
Block a user