#!/usr/bin/env python3
# Hulot converter: text/x-tex→text/html
# © Reuben Thomas 2002-2025

import os
from os.path import basename, dirname, join
import sys
import tempfile
import re
import subprocess

file = sys.argv[1]
with tempfile.TemporaryDirectory() as tempdir:
    filebase = re.sub('.tex$', '', basename(file)) # FIXME: Better way to parse filenames?
    dir = dirname(file)
    # FIXME: Move customization of special HeVeA files into web.pl
    # FIXME: Have per-user path configured somewhere
    subprocess.check_call(f'cd {tempdir}; cp "{file}" .; env BIBINPUTS="{dir}:" TEXINPUTS="{dir}:" latex-mk --pdflatex "{file}" > {tempdir}/log; hevea -fix -I "{dir}" sym.hva latex2html.hva local.hva "{filebase}" >> {tempdir}/log', shell=True)
    text = open(join(tempdir, f"{filebase}.html", encoding='utf-8').read()
    # Workaround for poems: if no H1, extract title element (if there's a real one, not just the filename) and reinject as H1
    if not re.search('<H1[^>]*>', text, flags=re.IGNORECASE):
        m = re.search('<TITLE[^>]*>(.*)</TITLE>', text, flags=re.MULTILINE | re.DOTALL | re.IGNORECASE)
        try:
            title = m.group(1)
            if title.find('/tmp') == -1:
                text = re.sub('(<BODY[^>]*>)', r'\1<H1>' + title + '</H1>', text, flags=re.IGNORECASE)
        except: pass
    sys.stdout.buffer.write(text.encode('utf-8'))
    # Debugging
    #print(text, file=sys.stderr)
    #print(open(tempdir + "/log").read(), file=sys.stderr)
