Files
MastersThesis/generate_mermaid_figures.py
sergio 0089b34cb3
Some checks failed
build_docker / essential (push) Successful in 0s
build_docker / build_paddle_ocr (push) Successful in 4m57s
build_docker / build_raytune (push) Has been cancelled
build_docker / build_easyocr_gpu (push) Has been cancelled
build_docker / build_doctr (push) Has been cancelled
build_docker / build_doctr_gpu (push) Has been cancelled
build_docker / build_paddle_ocr_gpu (push) Has been cancelled
build_docker / build_easyocr (push) Has been cancelled
Documentation review and data consistency.
2026-01-24 15:53:34 +01:00

165 lines
6.4 KiB
Python

#!/usr/bin/env python3
"""Extract Mermaid diagrams from markdown files and convert to PNG images."""
import os
import re
import subprocess
import json
import cairosvg
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
DOCS_DIR = os.path.join(BASE_DIR, 'docs')
OUTPUT_DIR = os.path.join(BASE_DIR, 'thesis_output/figures')
MMDC = os.path.join(BASE_DIR, 'node_modules/.bin/mmdc')
CONFIG_FILE = os.path.join(BASE_DIR, 'mermaid.config.json')
PUPPETEER_CONFIG = os.path.join(BASE_DIR, 'tem/scripts/puppeteer_config.json')
# Light blue color for bar charts
BAR_COLOR = '#0098CD'
# Default bar colors that need to be replaced (varies by theme)
DEFAULT_BAR_COLORS = ['#ECECFF', '#FFF4DD', '#ececff', '#fff4dd']
def extract_mermaid_diagrams():
"""Extract all mermaid diagrams from markdown files."""
diagrams = []
md_files = [
'02_contexto_estado_arte.md',
'03_objetivos_metodologia.md',
'04_desarrollo_especifico.md',
'07_anexo_a.md',
]
for md_file in md_files:
filepath = os.path.join(DOCS_DIR, md_file)
if not os.path.exists(filepath):
continue
with open(filepath, 'r', encoding='utf-8') as f:
content = f.read()
# Find all mermaid blocks
pattern = r'```mermaid\n(.*?)```'
matches = re.findall(pattern, content, re.DOTALL)
for i, mermaid_code in enumerate(matches):
# Try to extract title from YAML front matter
# Match title with quotes: title: "Something" or title: 'Something'
title_match = re.search(r'title:\s*["\']([^"\']+)["\']', mermaid_code)
if not title_match:
# Match title without quotes: title: Something
title_match = re.search(r'title:\s*([^"\'\n]+)', mermaid_code)
title = title_match.group(1).strip() if title_match else f"Diagrama {len(diagrams) + 1}"
diagrams.append({
'source': md_file,
'code': mermaid_code.strip(),
'title': title,
'index': len(diagrams) + 1
})
return diagrams
def convert_to_png(diagrams):
"""Convert mermaid diagrams to PNG using mmdc."""
os.makedirs(OUTPUT_DIR, exist_ok=True)
generated = []
for diagram in diagrams:
# Write mermaid code to temp file
temp_file = os.path.join(OUTPUT_DIR, f'temp_{diagram["index"]}.mmd')
output_file = os.path.join(OUTPUT_DIR, f'figura_{diagram["index"]}.png')
# Check if this is a bar chart (xychart-beta)
is_bar_chart = 'xychart-beta' in diagram['code']
with open(temp_file, 'w', encoding='utf-8') as f:
f.write(diagram['code'])
try:
if is_bar_chart:
# For bar charts: generate SVG, fix colors, convert to PNG
svg_file = os.path.join(OUTPUT_DIR, f'temp_{diagram["index"]}.svg')
cmd = [MMDC, '-i', temp_file, '-o', svg_file, '-b', 'white', '-w', '1600', '-c', CONFIG_FILE]
if os.path.exists(PUPPETEER_CONFIG):
cmd += ['--puppeteerConfigFile', PUPPETEER_CONFIG]
result = subprocess.run(
cmd,
capture_output=True,
text=True,
timeout=60
)
if os.path.exists(svg_file):
# Read SVG and replace bar color
with open(svg_file, 'r', encoding='utf-8') as f:
svg_content = f.read()
# Replace default bar colors with light blue (both fill and stroke)
for default_color in DEFAULT_BAR_COLORS:
svg_content = svg_content.replace(f'fill="{default_color}"', f'fill="{BAR_COLOR}"')
svg_content = svg_content.replace(f"fill='{default_color}'", f"fill='{BAR_COLOR}'")
svg_content = svg_content.replace(f'stroke="{default_color}"', f'stroke="{BAR_COLOR}"')
svg_content = svg_content.replace(f"stroke='{default_color}'", f"stroke='{BAR_COLOR}'")
# Convert SVG to PNG using cairosvg (with scale for high resolution)
cairosvg.svg2png(bytestring=svg_content.encode('utf-8'),
write_to=output_file,
scale=3)
# Clean up SVG
os.remove(svg_file)
else:
# For other diagrams: direct PNG generation
cmd = [MMDC, '-i', temp_file, '-o', output_file, '-b', 'white', '-w', '1600', '-s', '3', '-c', CONFIG_FILE]
if os.path.exists(PUPPETEER_CONFIG):
cmd += ['--puppeteerConfigFile', PUPPETEER_CONFIG]
result = subprocess.run(
cmd,
capture_output=True,
text=True,
timeout=60
)
if os.path.exists(output_file):
print(f"✓ Generated: figura_{diagram['index']}.png - {diagram['title']}")
generated.append({
'file': f'figura_{diagram["index"]}.png',
'title': diagram['title'],
'index': diagram['index']
})
else:
print(f"✗ Failed: figura_{diagram['index']}.png - {result.stderr}")
except subprocess.TimeoutExpired:
print(f"✗ Timeout: figura_{diagram['index']}.png")
except Exception as e:
print(f"✗ Error: figura_{diagram['index']}.png - {e}")
# Clean up temp file
if os.path.exists(temp_file):
os.remove(temp_file)
return generated
def main():
print("Extracting Mermaid diagrams from markdown files...")
diagrams = extract_mermaid_diagrams()
print(f"Found {len(diagrams)} diagrams\n")
print("Converting to PNG images...")
generated = convert_to_png(diagrams)
print(f"\n✓ Generated {len(generated)} figures in {OUTPUT_DIR}")
# Save manifest for apply_content.py to use
manifest_file = os.path.join(OUTPUT_DIR, 'figures_manifest.json')
with open(manifest_file, 'w', encoding='utf-8') as f:
json.dump(generated, f, indent=2, ensure_ascii=False)
print(f"✓ Saved manifest to {manifest_file}")
if __name__ == '__main__':
main()