#!/usr/bin/env python3
#
# (c) 2018 Fetal-Neonatal Neuroimaging & Developmental Science Center
#                   Boston Children's Hospital
#
#              http://childrenshospital.org/FNNDSC/
#                        dev@babyMRI.org
#

import sys, os
sys.path.insert(1, os.path.join(os.path.dirname(__file__), '../pfdicom_tagSub'))

import  pfdicom_tagSub
from    argparse            import RawTextHelpFormatter
from    argparse            import ArgumentParser
import  pudb

import  pfmisc
from    pfmisc._colors      import Colors
from    pfmisc              import other

str_version = "2.0.6"
str_desc = Colors.CYAN + """

        __    _ _                       _               _____       _     
       / _|  | (_)                     | |             /  ___|     | |    
 _ __ | |_ __| |_  ___  ___  _ __ ___  | |_  __ _  __ _\ `--. _   _| |__  
| '_ \|  _/ _` | |/ __|/ _ \| '_ ` _ \ | __|/ _` |/ _` |`--. \ | | | '_ \ 
| |_) | || (_| | | (__| (_) | | | | | || |_| (_| | (_| /\__/ / |_| | |_) |
| .__/|_| \__,_|_|\___|\___/|_| |_| |_| \__|\__,_|\__, \____/ \__,_|_.__/ 
| |                                 ______         __/ |                  
|_|                                |______|       |___/                   




                        Path-File DICOM tag substiution

        Recursively walk down a directory tree and process DICOM tags,
        saving each source DICOM in an output tree that  preserves the
        input directory structure.

        Basically a DICOM anonymizer.

                             -- version """ + \
             Colors.YELLOW + str_version + Colors.CYAN + """ --

        'pfdicom_tagSub' is a customizable and friendly DICOM tag substitutor.
        As part of the "pf*" suite of applications, it is geared to IO as
        directories. Input DICOM trees are reconstructed in an output
        directory, preserving directory structure. Each node tree contains
        a copy of the original DICOM with a user-specified tag list changed
        in the output.

        `pfdicom_tagSub` is typically called with a JSON structure defining
        the DICOM tag name to substitute, along with the substitute value.
        Individual tags can be explicitly referenced, as well as a regular
        expression construct to capture all tags satisfying that expression.
        This allows for capturing all tags with a certain string pattern
        without needing to explicitly list every confirming tag.


""" + Colors.NO_COLOUR

def synopsis(ab_shortOnly = False):
    scriptName = os.path.basename(sys.argv[0])
    shortSynopsis =  """
    NAME

	    pfdicom_tagSub

    SYNOPSIS

        pfdicom_tagSub                                                      \\
                     -I|--inputDir <inputDir>                               \\
                    [-i|--inputFile <inputFile>]                            \\
                    [-e|--extension <DICOMextension>]                       \\
                    [-F|--tagFile <tagFile>] | [-T|--tagStruct <tagStruct>] \\
                    [--threads <numThreads>]                                \\
                     -O|--outputDir <outputDir>                             \\
                    [-x|--man]                                              \\
                    [-y|--synopsis]                                         \\
                    [--followLinks]                                         \\
                    [--json]

    BRIEF EXAMPLE

        pfdicom_tagSub                                                      \\
            -e dcm                                                          \\
            -I /var/www/html/normsmall                                      \\
            -O /var/www/html/anon                                           \\
            --tagStruct '
            {
                "PatientName":              "%_name|patientID_PatientName",
                "PatientID":                "%_md5|7_PatientID",
                "AccessionNumber":          "%_md5|8_AccessionNumber",
                "PatientBirthDate":         "%_strmsk|******01_PatientBirthDate",
                "re:.*hysician":            "%_md5|4_#tag"
                "re:.*stitution":           "#tag",
                "re:.*ddress":              "#tag"
            }
            ' --threads 0 --printElapsedTime
    """

    description =  '''
    DESCRIPTION

        ``pfdicom_tagSub`` replaces a set of ``<tag, value>`` pairs in a DICOM
        header with values passed in a JSON structure (either from the CLI or
        read from a JSON file).

        Individual DICOM tags can be explicitly referenced in the JSON structure,
        as well as a regular expression construct to capture all tags satisfying
        that expression. This allows for capturing all tags with a certain string
        pattern without needing to explicitly list every confirming tag.

        Tag regular expression constructs are ``python`` string expressions and
        are prefixed by ``"re:<pythonRegex>"``. For example, ``"re:.*hysician"``
        will perform some substitution on all tags that contain the letters
        ``hysician``. The value substitution has access to a special lookup,
        ``#tag``, which is the current tag hit. It is possible to apply built in
        functions to the tag hit, for example ``md5`` hashing, using
        ``"%_md5|4_#tag"``,

            {
                "re:.*hysician":                "%_md5|4_#tag"
            }

        will be expanded to


            {
                "PerformingPhysiciansName" :    "%_md5|4_PerformingPhysiciansName"
                "PhysicianOfRecord"        :    "%_md5|4_PhysicianOfRecord"
                "ReferringPhysiciansName"  :    "%_md5|4_ReferringPhysiciansName"
                "RequestingPhysician"      :    "%_md5|4_RequestingPhysician"
            }

        The tag regular expression construct allows for simple and powerful bulk
        substition of ``<tag, value>`` pairs.

        The script accepts an ``<inputDir>``, and then from this point an
        ``os.walk()`` is performed to extract all the subdirs. Each subdir is
        examined for DICOM files (in the simplest sense by a file extension mapping)
        are passed to a processing method that reads and replaces specified
        DICOM tags, saving the result in a corresponding directory and filename
        in the output tree.

    ARGS

        -I|--inputDir <inputDir>
        Input DICOM directory to examine. By default, the first file in this
        directory is examined for its tag information. There is an implicit
        assumption that each <inputDir> contains a single DICOM series.

        -i|--inputFile <inputFile>
        An optional <inputFile> specified relative to the <inputDir>. If
        specified, then do not perform a directory walk, but convert only
        this file.

        -e|--extension <DICOMextension>
        An optional extension to filter the DICOM files of interest from the
        <inputDir>.

        [-O|--outputDir <outputDir>]
        The output root directory that will contain a tree structure identical
        to the input directory, and each "leaf" node will contain the analysis
        results.

        [--outputLeafDir <outputLeafDirFormat>]
        If specified, will apply the <outputLeafDirFormat> to the output
        directories containing data. This is useful to blanket describe
        final output directories with some descriptive text, such as
        'anon' or 'preview'.

        This is a formatting spec, so

            --outputLeafDir 'preview-%%s'

        where %%s is the original leaf directory node, will prefix each
        final directory containing output with the text 'preview-' which
        can be useful in describing some features of the output set.

        -F|--tagFile <JSONtagFile>
        Parse the tags and their "subs" from a JSON formatted <JSONtagFile>.

        -T|--tagStruct <JSONtagStructure>
        Parse the tags and their "subs" from a JSON formatted <JSONtagStucture>
        passed directly in the command line.

        -o|--outputFileStem <outputFileStem>
        The output file stem to store data. This should *not* have a file
        extension, or rather, any "." in the name are considered part of
        the stem and are *not* considered extensions.

        [--threads <numThreads>]
        If specified, break the innermost analysis loop into <numThreads>
        threads.

        [-x|--man]
        Show full help.

        [-y|--synopsis]
        Show brief help.

        [--json]
        If specified, output a JSON dump of final return.

        [--followLinks]
        If specified, follow symbolic links.

        -v|--verbosity <level>
        Set the app verbosity level.

            0: No internal output;
            1: Run start / stop output notification;
            2: As with level '1' but with simpleProgress bar in 'pftree';
            3: As with level '2' but with list of input dirs/files in 'pftree';
            5: As with level '3' but with explicit file logging for
                    - read
                    - analyze
                    - write

    EXAMPLES

    Perform a DICOM anonymization by processing specific tags:

        pfdicom_tagSub                                      \\
            -e dcm                                          \\
            -I /var/www/html/normsmall                      \\
            -O /var/www/html/anon                           \\
            --tagStruct '
            {
                "PatientName":              "%_name|patientID_PatientName",
                "PatientID":                "%_md5|7_PatientID",
                "AccessionNumber":          "%_md5|8_AccessionNumber",
                "PatientBirthDate":         "%_strmsk|******01_PatientBirthDate",
                "re:.*hysician":            "%_md5|4_#tag"
                "re:.*stitution":           "#tag",
                "re:.*ddress":              "#tag"
            }
            ' --threads 0 --printElapsedTime

        will replace the explicitly named tags as shown:

        * the ``PatientName`` value will be replaced with a Fake Name,
          seeded on the ``PatientID``;

        * the ``PatientID`` value will be replaced with the first 7 characters
          of an md5 hash of the ``PatientID``;

        * the ``AccessionNumber``  value will be replaced with the first 8
          characters of an md5 hash of the `AccessionNumber`;

        * the ``PatientBirthDate`` value will set the final two characters,
          i.e. the day of birth, to ``01`` and preserve the other birthdate
          values;

        * any tags with the substring ``hysician`` will have their values
          replaced with the first 4 characters of the corresponding tag value
          md5 hash;

        * any tags with ``stitution`` and ``ddress`` substrings in the tag
          contents will have the corresponding value simply set to the tag
          name.

        NOTE:

        Spelling matters! Especially with the substring bulk replace, please
        make sure that the substring has no typos, otherwise the target tags
        will most probably not be processed.

    '''

    if ab_shortOnly:
        return shortSynopsis
    else:
        return shortSynopsis + description



parser  = ArgumentParser(description = str_desc, formatter_class = RawTextHelpFormatter)

parser.add_argument("-I", "--inputDir",
                    help    = "input dir",
                    dest    = 'inputDir')
parser.add_argument("-i", "--inputFile",
                    help    = "input file",
                    dest    = 'inputFile',
                    default = '')
parser.add_argument("-e", "--extension",
                    help    = "DICOM file extension",
                    dest    = 'extension',
                    default = '')
parser.add_argument("-F", "--tagFile",
                    help    = "JSON formatted file containing tags to sub",
                    dest    = 'tagFile',
                    default = '')
parser.add_argument("-T", "--tagStruct",
                    help    = "JSON formatted tag sub struct",
                    dest    = 'tagStruct',
                    default = '')
parser.add_argument("-o", "--outputFileStem",
                    help    = "output file",
                    default = "",
                    dest    = 'outputFileStem')
parser.add_argument("-O", "--outputDir",
                    help    = "output image directory",
                    dest    = 'outputDir',
                    default = '.')
parser.add_argument("--printElapsedTime",
                    help    = "print program run time",
                    dest    = 'printElapsedTime',
                    action  = 'store_true',
                    default = False)
parser.add_argument("--threads",
                    help    = "number of threads for innermost loop processing",
                    dest    = 'threads',
                    default = "0")
parser.add_argument("--outputLeafDir",
                    help    = "formatting spec for output leaf directory",
                    dest    = 'outputLeafDir',
                    default = "")
parser.add_argument("-x", "--man",
                    help    = "man",
                    dest    = 'man',
                    action  = 'store_true',
                    default = False)
parser.add_argument("-y", "--synopsis",
                    help    = "short synopsis",
                    dest    = 'synopsis',
                    action  = 'store_true',
                    default = False)
parser.add_argument("--json",
                    help    = "output final return in json",
                    dest    = 'json',
                    action  = 'store_true',
                    default = False)
parser.add_argument("--followLinks",
                    help    = "follow symbolic links",
                    dest    = 'followLinks',
                    action  = 'store_true',
                    default = False)
parser.add_argument("-v", "--verbosity",
                    help    = "verbosity level for app",
                    dest    = 'verbosity',
                    default = "1")
parser.add_argument('--version',
                    help    = 'if specified, print version number',
                    dest    = 'b_version',
                    action  = 'store_true',
                    default = False)

args = parser.parse_args()

if args.man or args.synopsis:
    print(str_desc)
    if args.man:
        str_help     = synopsis(False)
    else:
        str_help     = synopsis(True)
    print(str_help)
    sys.exit(1)

if args.b_version:
    print("Version: %s" % str_version)
    sys.exit(1)

pf_dicom_tagSub = pfdicom_tagSub.pfdicom_tagSub(
                        inputDir            = args.inputDir,
                        inputFile           = args.inputFile,
                        extension           = args.extension,
                        outputDir           = args.outputDir,
                        outputFileStem      = args.outputFileStem,
                        outputLeafDir       = args.outputLeafDir,
                        tagFile             = args.tagFile,
                        tagStruct           = args.tagStruct,
                        threads             = args.threads,
                        followLinks         = args.followLinks,
                        verbosity           = args.verbosity,
                        json                = args.json
                    )

# And now run it!
# pudb.set_trace()
d_pfdicom_tagSub = pf_dicom_tagSub.run(timerStart = True)

if args.printElapsedTime:
    pf_dicom_tagSub.dp.qprint(
                                "Elapsed time = %f seconds" %
                                d_pfdicom_tagSub['runTime']
                            )

sys.exit(0)
