#!python

'''
Usage:
    jsonLscan -f <filename>
    jsonLscan (-s | -f <filename>) --list <field> --as <output_field> [--prefix=<prefix_string>]
    jsonLscan (-s | -f <filename>) --list <field> [-r] [--prefix=<prefix_string>]
    jsonLscan (-s | -f <filename>) --readfile <field> [--into <output_file>] [--prefix=<prefix_string>]

 Options:
    -r --raw   print the raw output value as text, not JSON
    -f --file   read the JSONL data from a file
    -s --stdin   read the JSONL data from standard input
'''

'''+mdoc+

jsonLscan reads a set of JSONL records (from <filename> if the --file option is set, or from 
standard input if the --stdin option is set) and returns a set of JSONL records. 

If the --list option is set, then each output record is a name/value pair where <name> 
is the actual <field> parameter, and <value> is source_record[<field>].

So processing a sample.json file containing

{"first_name": "John", "last_name": "Smith"}
{"first_name":"Bob", "last_name":"Barker"}

using the command:

jsonLscan -f sample.json --list first_name

yields the output:

{"first_name": "John"}
{"first_name": "Bob"}



Set the --raw parameter to get back only the values, as raw text. So that the command

jsonLscan -f sample.json --list first_name -r

will yield

John
Bob

(When the --prefix option is set, jsonLscan will prepend <prefix_string> to each raw output
record.)


jsonLscan is capable of indirection. When the --readfile option is set, it will scan records 
in <file> and read the value of the specified field, not as data, but as a reference to 
ANOTHER file. 

Suppose we have a sample.json file containing 

{"first_name": "John", "last_name": "Smith"}
{"first_name":"Bob", "last_name":"Barker"}

and we have a meta.json file containing

{"filename": "sample.json", "id": 123456}

-- then, if we issue the command

scripts/jsonLscan --file meta.json --readfile filename

jsonLscan will:

1. treat the value of the "filename" field in meta.json as an actual filename (which it is);
2. load the file which that field refers to (in this case. sample.json);
3. print each JSON record in the referenced file.

This will simply give us the contents of sample.json.

+mdoc+
'''


import os, sys
import json

import uuid
import datetime
from collections import namedtuple
from snap import snap, common
from mercury import utils
from mercury import datamap
from mercury.utils import read_stdin
import docopt


def main(args):

    if args['--stdin']:
        
        prefix =  args.get('--prefix') or ''
        for src_line in read_stdin():
            if not src_line:
                continue
            
            source_record = json.loads(src_line)

            if args['--list']:
                input_field = args['<field>']

                if args['--as']:
                    output_field = source_record.get(input_field)
                    output_data = f'{prefix}{output_field}'

                    output_field_name = args['<output_field>']
                    output_record = {
                        output_field_name: output_data
                    }
                    print(json.dumps(output_record))
                    
                elif args['--raw']:
                    print(f'{prefix}{source_record.get(input_field)}')

        if args['--into']:
            output_filename = args['<output_file>']
            if os.path.isfile(output_filename):
                raise Exception(f'target file {output_filename} already exists.')

            with open(output_filename, 'x') as targetfile:
                for src_line in read_stdin():
                    targetfile.writeline(src_line)
        
        else:
            for src_line in read_stdin():
                print(src_line)

    elif args['--file']:
    
        manifest_file = args['<filename>']
        prefix = args.get('--prefix') or ''

        with open(manifest_file, 'r') as f:
            for raw_line in f:
                line = raw_line.strip()
                if not line:
                    continue
                source_record = json.loads(line)
                output_record = None
                #
                # read the incoming JSONL records and (optionally) map a single field to an output record
                #
                if args['--list']:
                    input_field = args['<field>']
                    
                    output_field = source_record.get(input_field)
                    output_data = f'{prefix}{output_field}'

                    if args['--as']:
                        output_field_name = args['<output_field>']
                        output_record = {
                            output_field_name: output_data
                        }
                        print(json.dumps(output_record))

                    elif args['--raw']:
                        print(f'{prefix}{source_record.get(input_field)}')
                
                    else:
                        output_record = {
                            input_field: f'{prefix}{output_data}'
                        }
                        print(json.dumps(output_record))
                
                elif args['--readfile']: 
                    # read the data field as a filename; open it and read out the contents...

                    if args.get('<filename>'):
                        ref_filename = f"{prefix}{source_record[args['<field>']]}"
                        
                        with open(ref_filename, 'r') as srcfile:
                            
                            if args['--into']:
                                # ...read out the contents into another file...

                                output_filename = args['<output_file>']
                                if os.path.isfile(output_filename):
                                    raise Exception(f'target file {output_filename} already exists.')

                                with open(output_filename, 'x') as targetfile:
                                    targetfile.writelines(srcfile)

                            else:
                                # ...or read the contents and print to standard out
                                for src_line in srcfile:
                                    print(src_line.strip())
                else:
                    print(json.dumps(source_record))
    
if __name__ == '__main__':
    args = docopt.docopt(__doc__)
    main(args)

