#!/usr/bin/env python

from pyPheWAS.pyPhewasCorev2 import *
import sys, os
import argparse
import time
import math

def parse_args():
    parser = argparse.ArgumentParser(description="pyPheWAS ICD-Phecode Lookup Tool")

    parser.add_argument('--phenotype', required=True, type=str, help='Name of the phenotype file (e.g. icd9_data.csv)')
    parser.add_argument('--group', required=True, type=str, help ='Name of the group file (e.g. groups.csv)')
    parser.add_argument('--reg_type', required=True, type=str, help='Type of regression that you would like to use (log, lin, or dur)')
    parser.add_argument('--path', required=False, default='.', type=str, help='Path to all input files and destination of output files')
    parser.add_argument('--outfile', required=False, default=None, type=str, help='Name of the output file for the feature matrix')
    parser.add_argument('--phewas_cov', required=False, default=None, type=str, help='PheCode to use as covariates in pyPhewasModel regression')

    args = parser.parse_args()
    return args

"""
Retrieve and validate all arguments.
"""
start = time.time()

args = parse_args()
kwargs = {'path': os.path.join(os.path.abspath(args.path),''),
		  'phenotypefile': args.phenotype,
		  'groupfile': args.group,
		  'outfile':args.outfile,
		  'phewas_cov':args.phewas_cov
}

# Assert that a valid regression type was used
assert args.reg_type in regression_map.keys(), "%s is not a valid regression type" % args.reg_type
kwargs['reg_type'] = regression_map[args.reg_type]


# Assert that valid files were given
assert kwargs['phenotypefile'].endswith('.csv'), "%s is not a valid phenotype file, must be a .csv file" % (kwargs['phenotypefile'])
assert kwargs['groupfile'].endswith('.csv'), "%s is not a valid group file, must be a .csv file" % (kwargs['groupfile'])

# Assign the output file if none was assigned
if kwargs['outfile'] is None:
    kwargs['outfile'] = "feature_matrix_" + kwargs['groupfile']

# Assert that the output file is valid
assert kwargs['outfile'].endswith('.csv'), "%s is not a valid outputfile, must be a .csv file" % (kwargs['outfile'])

# Check phewas_cov
# if kwargs['phewas_cov']:
#     kwargs['phewas_cov'] = float(kwargs['phewas_cov'])

# Print Arguments
display_kwargs(kwargs)

# Make all arguments local variables
locals().update(kwargs)

"""
Calculate feature matrix
"""
print("Retrieving phenotype data...")
phenotypes = get_icd_codes(path, phenotypefile, reg_type)

print("Retrieving group data...")
genotypes = get_group_file(path, groupfile)

assert {'MaxAgeAtVisit'}.issubset(genotypes.columns), "make sure MaxAgeAtVisit is in group file"

print("Generating feature matrix...")
fm = generate_feature_matrix(genotypes,phenotypes,reg_type,phewas_cov)

print("Saving feature matrices to %s" % (path + outfile))

np.savetxt(path + 'agg_measures_' + outfile, fm[0],delimiter=',')
print("...")
np.savetxt(path + 'icd_age_' + outfile, fm[1],delimiter=',')
print("...")
np.savetxt(path + 'phewas_cov_' + outfile, fm[2],delimiter=',')

interval = time.time() - start
hour = math.floor(interval/3600.0)
minute = math.floor((interval - hour*3600)/60)
second = math.floor(interval - hour*3600 - minute*60)

if hour > 0:
    time_str = '%dh:%dm:%ds' %(hour,minute,second)
elif minute > 0:
    time_str = '%dm:%ds' % (minute, second)
else:
    time_str = '%ds' % second

print('pyPhewasLookup Complete\nRuntime: %s' %time_str)

