#!/usr/bin/env python
"""Simple commandline interface for parsing PDF to HTML."""
import argparse
import logging
import pdftotree

if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        description="""
        Script to extract tree structure from PDF files. Takes a PDF as input
        and outputs an HTML-like representation of the document's structure. By
        default, this conversion is done using heuristics. However, a model can
        be provided as a parameter to use a machine-learning-based approach.
        """,
        usage="%(prog)s [options] pdf_file")
    parser.add_argument(
        '-m',
        '--model_path',
        type=str,
        default=None,
        help="Pretrained model, generated by extract_tables tool")
    parser.add_argument(
        'pdf_file',
        type=str,
        help="PDF file name for which tree structure needs to be extracted")
    parser.add_argument(
        '-o',
        '--output',
        type=str,
        help=
        "Path where tree structure should be saved. If none, HTML is printed to stdout."
    )
    parser.add_argument(
        '-f',
        '--favor_figures',
        type=str,
        help=
        "Whether figures must be favored over other parts such as tables and section headers",
        default="True")
    parser.add_argument(
        '-V',
        '--visualize',
        dest='visualize',
        action='store_true',
        help="Whether to output visualization images for the tree")
    parser.add_argument(
        '-v',
        '--verbose',
        dest='verbose',
        action='store_true',
        help="Output INFO level logging.")
    parser.add_argument(
        '-vv',
        '--veryverbose',
        dest='debug',
        action='store_true',
        help="Output DEBUG level logging.")
    parser.set_defaults(visualize=False)
    args = parser.parse_args()

    if args.debug:
        log_level = logging.DEBUG
    elif args.verbose:
        log_level = logging.INFO
    else:
        log_level = logging.ERROR

    # Configure logging for this application
    log = logging.getLogger('pdftotree')
    log.setLevel(log_level)

    ch = logging.StreamHandler()
    ch.setLevel(log_level)

    formatter = logging.Formatter("[%(levelname)s] %(name)s - %(message)s")
    ch.setFormatter(formatter)

    log.addHandler(ch)

    # Call the main routine
    result = pdftotree.parse(args.pdf_file, args.output, args.model_path,
                             args.favor_figures, args.visualize)

    if result:
        print(result)
    else:
        print("HTML output to {}".format(args.output))
