#!python

'''
Usage:
    chunkr --filenames <listfile> --src-dir <directory> --chunks <count> --pfx <outfile_prefix> --ext <extension> [-t <outdir>] [limit=<max_files>]
    chunkr --records <datafile> --chunks <count> --pfx <outfile_prefix> --ext <extension> [-t <outdir>] [limit=<max_records>]

Options:
    -t --target-dir  send chunked datafiles to this directory
'''

'''
+mdoc+

chunkr splits a list of strings and divides it into N chunks, where N is specified by 

--chunks <count>

It has two modes: filename-mode (selected by passing --filenames <listfile>) and record-mode
(selected by passing --records <datafile>).

In filename mode, it will treat the <listfile> parameter as a meta-list; that is, it will compile 
its master list of records to be chunked by reading every filename specified in <listfile>.

In record mode, it will treat <datafile> as its master list of records to be chunked.

Each chunkfile generated by chunkr will be named in the format <outfile-prefix>_N.<extension>. 
By default it will write chunkfiles to the current directory, but can write them to the directory specified
by the optional <outdir> parameter.

The optional limit=<N> parameter limits it to processing N input records.

+mdoc+
'''


import os, sys
import json
import docopt
from snap import common
from collections import namedtuple

FileRef = namedtuple('FileRef', 'name handle')


def generate_filerefs(chunks, prefix, extension, outdir = None):

    ref_list = []
    for index in range(chunks):
        filename = f'{prefix}_{index + 1}.{extension}'
        filepath = os.path.join(outdir, filename) if outdir else filename
        ref_list.append(FileRef(name=filename, handle=open(filepath, 'x')))
        
    return ref_list



def main(args):

    chunk_size = int(args['<count>'])
    prefix = args['<outfile_prefix>']
    extension = args['<extension>']
    

    master_data_list = []
    lists = []

    for i in range(chunk_size):
        lists.append([]) # pre-populate with empty arrays

    if args['--records']:

        outdir = None
        if args['--target-dir']:
           outdir = args['<outdir>'] 
        file_refs = generate_filerefs(chunk_size, prefix, extension, outdir)
        datafile_name = args['<datafile>']

        with open(datafile_name, 'r') as f:
            counter = 0
            for raw_line in f:
                record = raw_line.strip()
                if not record:
                    continue

                index = counter % chunk_size
                file_refs[index].handle.write(record)
                file_refs[index].handle.write('\n')
                counter += 1
        
        for ref in file_refs:
            print(ref.name)
            ref.handle.close()


    elif args['--filenames']:
        source_directory = None
        if args['--src-dir']:
            source_directory = os.path.join(os.getcwd(), args['<directory>'])
        listfile_name = args['<listfile>']
        file_list = []
        with open(listfile_name, 'r') as f:
            
            for raw_line in f:
                filename = raw_line.strip()
                if not filename:
                    continue

                file_list.append(os.path.join(source_directory, filename))

        for filename in file_list:
            with open(filename, 'r') as f:
                for raw_line in f:
                    record = raw_line.strip()
                    if not record:
                        continue

                    master_data_list.append(record)         

        counter = 0
        for record in master_data_list:
            lists[counter % chunk_size].append(record) 
            counter += 1

        index = 1
        output_directory = os.getcwd()
        if args['--target-dir']:
            output_directory = args['<outdir>']

        for record_list in lists:
            outfile_name = f'{prefix}_{index}.{extension}'
            outfile_path = os.path.join(output_directory, outfile_name)

            with open(outfile_path, 'w') as f:
                for item in record_list:
                    f.write(item)
                    f.write('\n')
            
            print(outfile_name)
            index += 1
        

if __name__ == '__main__':
    args = docopt.docopt(__doc__)
    main(args)