#!/usr/bin/env python

import sys
import json
import argparse
import h5_to_json as h5j

def main():
    old_stdout = sys.stdout
    sys.stdout = sys.stderr

    parser = argparse.ArgumentParser(description='Serialize hdf5 file to JSON with option to separate out binary datasets')
    parser.add_argument('h5_path', help='Path to the hdf5 file')
    parser.add_argument('json_out_path', nargs='*', help='Optional path to the output JSON file or use -- if not provided, writes to stdout')
    parser.add_argument('--include-datasets', help='Whether to include the binary data of datasets inside the JSON content (not a great idea for large datasets b/c the JSON serialization is inefficient)', required=False, action='store_true')
    parser.add_argument('--include-dataset-names', help='Comma-separated list of names of datasets to include in the JSON content', required=False, default=None)
    parser.add_argument('--data-dir', '-d', help='Directory for storing binary data by hash', required=False, default=None)

    args = parser.parse_args()

    include_dataset_names = None
    if args.include_dataset_names:
        include_dataset_names=args.include_dataset_names.split(',')
    X = h5j.h5_to_dict(args.h5_path, include_datasets=args.include_datasets, data_dir=args.data_dir, include_dataset_names=include_dataset_names)

    sys.stdout = old_stdout
    if len(args.json_out_path) == 0:
        print(json.dumps(X, indent=4))
    else:
        with open(args.json_out_path[0], 'w') as f:
            json.dump(X, f, indent=4)
        print('Wrote to {}'.format(args.json_out_path[0]))

if __name__ == "__main__":
    main()
