#!/usr/bin/env python3

import sys
from internetarchivepdf.recode import recode
from internetarchivepdf.mrc import KDU_COMPRESS, KDU_EXPAND, OPJ_COMPRESS, OPJ_DECOMPRESS
from internetarchivepdf.const import (VERSION, PRODUCER,
        IMAGE_MODE_PASSTHROUGH, IMAGE_MODE_PIXMAP, IMAGE_MODE_MRC)
from shutil import which


if __name__ == '__main__':
    import argparse

    parser = argparse.ArgumentParser(
            description='PDF recoder version %s.' % VERSION +
                        ' Compresses PDFs with images and inserts text layers '
                        ' based on hOCR input files.')

    # For Python 3.6, 3.7
    class ExtendAction(argparse.Action):
        def __call__(self, parser, namespace, values, option_string=None):
            items = getattr(namespace, self.dest) or []
            items.extend(values)
            setattr(namespace, self.dest, items)
    parser.register('action', 'extend', ExtendAction)
    # End for Python 3.6, 3.7

    parser.add_argument('-P', '--from-pdf', type=str, default=None,
                        help='Input PDF (containing images) to recode')
    parser.add_argument('-I', '--from-imagestack', type=str, default=None,
                        help='Glob pattern for image stack')
    parser.add_argument('-D', '--dpi', type=int, default=None,
                        help='DPI of input images, supply this to get '
                             'proportional page sizes in resulting PDF')
    parser.add_argument('-T', '--hocr-file', type=str, default=None,
                        help='hOCR file containing page information '
                              '(currently not optional)')
    parser.add_argument('-S', '--scandata-file', type=str, default=None,
                        help='archive.org specific.'
                              'Scandata XML file containing information on '
                              'which pages to skip (optional). This is helpful '
                              'if the input PDF is a PDF where certain '
                              'pages have already been skipped, but the hOCR '
                              'still has the pages in its file structure, '
                              'and is also used for page labels (numbering)')
    parser.add_argument('-o', '--out-pdf', type=str, default=None,
                        help='Output file to write recoded PDF to.')
    parser.add_argument('-O', '--out-dir', type=str, default=None,
                        help='Output directory to (also) write images to.')
    parser.add_argument('-R', '--reporter', type=str, default=None,
                        help='Program to launch when reporting progress.')
    parser.add_argument('--grayscale-pdf', action='store_true',
                        default=False,
                        help='Whether to convert all images to grayscale in '
                             'the resulting PDF')
    parser.add_argument('-m', '--image-mode', default=IMAGE_MODE_MRC,
                        help='Compression mode. 0 is pass-through, 1 is pixmap'
                              ' 2 is MRC (default is 2). 3 is skip images',
                              type=int)
    parser.add_argument('--jbig2', default=False, help='Encode using jbig2',
                        action='store_true')
    parser.add_argument('-v', '--verbose', default=False, action='store_true',
                        help='Verbose output')
    parser.add_argument('--tmp-dir', default=None, type=str,
                        help='Directory to store temporary intermediate images')
    parser.add_argument('--report-every', default=None, type=int,
                        help='Report on status every N pages '
                             '(default is no reporting)')
    parser.add_argument('-t', '--stop-after', default=None, type=int,
                        help='Stop after N pages (default is no stop)')
    # TODO: Either rename --bg-slope and --fg-slope or add special flags for
    # opj_compress
    parser.add_argument('--use-openjpeg', default=False, action='store_true',
                        help='Use opj_compress and opj_decompress instead of'
                             ' kakadu. **Currently the compression quality is'
                             ' hardcoded**')
    parser.add_argument('--bg-slope', default=44250, type=int,
                        help='Slope for background layer.'
                             ' Default is 44250')
    parser.add_argument('--fg-slope', default=45000, type=int,
                        help='Slope for foreground layer.'
                             ' Default is 45000')
    parser.add_argument('--downsample', default=None, type=int,
                        help='Downsample entire image by factor before '
                              'processing. Default is no downscaling.')
    parser.add_argument('--bg-downsample', default=None, type=int,
                        help='Downsample background by factor.'
                             ' Default is no scaling')
    parser.add_argument('--denoise-mask', default=None,
                        help='Denoise mask when MRC algorithm thinks it needs'
                             ' to denoise - which is not always.'
                             ' Default is off', action='store_true')
    parser.add_argument('--hq-pages', type=str, default=None,
                        help='Pages to render in higher quality, provided '
                             'as comma separate values, negative indexing is '
                             'allowed, e.g.: --hq-pages \'1,2,3,4,-4,-3,-2,-1\''
                             ' will make the first four and last four pages '
                             'of a higher quality. Pages marked as higher '
                             'quality will not get downsampled and might use '
                             'different slope values (see --hq-bg-slope '
                             'and --hq-fg-slope)')
    parser.add_argument('--hq-bg-slope', default=43500 , type=int,
                        help='High quality slope for background layer.'
                             ' Default is 43500')
    parser.add_argument('--hq-fg-slope', default=44500, type=int,
                        help='High quality slope for foreground layer.'
                             ' Default is 44500')
    parser.add_argument('--render-text-lines', action='store_true',
                        default=False,
                        help='Whether to render the text line visible instead '
                             'of invisible')
    parser.add_argument('--metadata-url', type=str, default=None,
                        help='URL describing document, if any')
    parser.add_argument('--metadata-title', type=str, default=None,
                        help='Title of PDF document')
    parser.add_argument('--metadata-author', type=str, default=None,
                        help='Author of document')
    parser.add_argument('--metadata-creator', type=str, default=None,
                        help='Creator of PDF document')
    parser.add_argument('--metadata-language', type=str, default=None,
                        nargs='+', action='extend',
                        help='Language of PDF document, see RFC 3066. '
                             'If multiple languages are specified, only the '
                             'first is added to the PDF catalog, but all of '
                             'them will end up in the XMP metadata')
    parser.add_argument('--metadata-subject', type=str, default=None,
                        help='Subjects')
    parser.add_argument('--metadata-creatortool', type=str, default=None,
                        help='Creator tool')


    args = parser.parse_args()
    if (args.from_pdf is None and args.from_imagestack is None) or args.out_pdf is None:
        sys.stderr.write('***** Error: --from-pdf or --out-pdf missing\n\n')
        parser.print_help()
        sys.exit(1)

    if args.from_imagestack is not None and args.from_pdf is not None:
        sys.stderr.write('***** Error: --from-pdf and --from-imagestack '
                         'are mutually exclusive\n\n')
        parser.print_help()
        sys.exit(1)

    if args.image_mode == IMAGE_MODE_MRC:
        if args.use_openjpeg:
            if not (which(OPJ_COMPRESS) and which(OPJ_DECOMPRESS)):
                sys.stderr.write('***** Error: --use-openjpeg is provided but opj_compress and opj_decompress are not found in $PATH\n')
                sys.exit(1)
        else:
            if not (which(KDU_EXPAND) and which(KDU_COMPRESS)):
                sys.stderr.write('***** Error: kakadu is requested (this is the default, pass --use-openjpeg for the alternative compression), but kdu_expand and kdu_compress are not found in $PATH\n')
                sys.exit(1)


    res = recode(args.from_pdf, args.from_imagestack, args.dpi, args.hocr_file,
           args.scandata_file, args.out_pdf, args.out_dir,
           args.reporter,
           args.grayscale_pdf,
           args.image_mode, args.jbig2, args.verbose, args.tmp_dir,
           args.report_every, args.stop_after,
           args.use_openjpeg,
           args.bg_slope, args.fg_slope,
           args.downsample,
           args.bg_downsample,
           args.denoise_mask,
           args.hq_pages,
           args.hq_bg_slope, args.hq_fg_slope,
           args.render_text_lines,
           args.metadata_url, args.metadata_title, args.metadata_author,
           args.metadata_creator, args.metadata_language,
           args.metadata_subject, args.metadata_creatortool)

    errors = res['errors']
    if len(errors) > 0:
        for error in errors:
            print('Encountered runtime error:', error)
