#!python

import os.path
import pandas as pd
from cemconvert.run_parse import RunOpts
from cemconvert.qa import write_hourly_qa
from cemconvert.ff10 import FF10
from cemconvert.cem import CEM
from cemconvert.temporal import Temporal

def main():
    opts = RunOpts()
    ### Add in written QA products
    eisids = ['facility_id','unit_id','rel_point_id','process_id']
    orisids = ['oris_facility_code','oris_boiler_id']
    inv = FF10(opts)
    # Get full annual FF10 and header
    ann_head, ann_ff10 = inv.read_ann_ff10(opts.ann_ff10)
    # Define metadata for the hourly processing
    fips = ann_ff10[['facility_id','country_cd','region_cd']].drop_duplicates('facility_id')
    sccs = ann_ff10[['unit_id','process_id','scc']].drop_duplicates(['unit_id','process_id'])
    annemis = inv.extract_ann_emis(ann_ff10)
    hourly = proc_hourly(opts)
    # Calculate the unit-level CEMs temporal factors for annual->hourly
    temp = Temporal(opts)
    cem_temporal = temp.calc_cem_temporal(hourly)
    # Setup an empty dataframe to hold the annual values
    annual = pd.DataFrame()
    # Copy over the hourly CEM values and temporalize the annual inventory emissions to hourly
    hourly = set_key(hourly[hourly['poll'].isin(opts.cempolls)].copy())
    anndef = set_key(annemis[annemis['poll'].isin(opts.cempolls)].copy())
    # Merge in and apply the unit to process ID fractions
    hrcols = list(hourly.columns) + eisids
    hourly = anndef.join(hourly, lsuffix='_ff10')
    valcols = ['daytot',]+['hrval%s' %hr for hr in range(24)]
    hourly[valcols] = hourly[valcols].fillna(0).multiply(hourly['unit_frac'].fillna(1), axis=0)
    # Loop over the months in the file. Doing this on the annual seems like a big memory sink 
    #  and this could eventually be parallelized
    for month in list(hourly.loc[hourly['month'].notnull(), 'month'].drop_duplicates()):
        print('Month %s' %int(month), flush=True)
        hourlymth = hourly.loc[hourly['month'] == month, hrcols].copy()
        mthtemp = cem_temporal[cem_temporal['month'] == month]
        # Temporalize non-CEM variables
        for poll in opts.calcpolls:
            print(poll)
            hourly_poll = temp.apply_temporal(annemis[annemis['poll'] == poll], mthtemp)
            hourlymth = hourlymth.append(set_key(hourly_poll))
        # Find units where the CEMs NOX/SO2/CO2 is 0 annually and replace with temporalized annual
        for poll in opts.cempolls:
            zunit = hourlymth.loc[hourlymth['poll'] == poll].groupby(level=0)['daytot'].sum()
            zunit = zunit[zunit == 0].index.unique()
            if len(zunit) > 0:
                unitreplace  = hourly.loc[zunit, 'daytot']
                hourlymth.drop(zunit, inplace=True)
                unitreplace = unitreplace.join(annemis, lsuffix='_old')
                hourly_poll = temp.apply_temporal(unitreplace, mthtemp)
                hourlymth = hourlymth.append(set_key(hourly_poll))
        # Fill in the HOURACT variable for temporalization of other variables
        mthtemp = mthtemp.merge(annemis[eisids+orisids].drop_duplicates(), on=orisids, how='left') 
        mthtemp['poll'] = opts.temporalvar 
        hourlymth = hourlymth.append(set_key(mthtemp))
        hourlymth = proc_hourly_meta(hourlymth, fips, sccs)
        fn = os.path.join(opts.output_path, 'pthour_%0.2d_%s.csv' %(month, opts.year))
        inv.write_monthly_ff10(hourlymth, fn)
        # Append the daytot from the hourly to the annual dataframe
        annual = annual.append(hourlymth[eisids+orisids+['poll','month','daytot']])
        fn = os.path.join(opts.output_path, 'qa_pthour_%0.2d_%s.csv' %(month, opts.year))
        write_hourly_qa(fn, hourly, hourlymth)
    fn = os.path.join(opts.output_path, 'ptinv_%s.csv' %opts.year)
    inv.write_annual(fn, annual, ann_ff10, opts.year)

def proc_hourly(opts):
    '''
    Read in the hourly CEM values by month in the new format
    Write to the old format
    Return a pivoted version
    '''
    cems_months = ('jan','feb','mar','apr','may','jun','july','aug','sept','oct','nov','dec')
    cems = CEM()
    # Init an empty dataframe to hold the hourly values
    hourly = pd.DataFrame()
    for n in opts.months:
        month = cems_months[n-1]
        print('Processing %s' %month, flush=True)
        # Read in CAMPD CEM inputs
        fn = os.path.join(opts.input_path, 'campd-%s-%s-hourly.txt' %(opts.year, month))
        monthly = cems.read_cems_month(fn)
        if opts.write_cems:
            # Write out old CEM format
            fn = os.path.join(opts.output_path, 'HOUR_UNIT_%s_%0.2d.txt' %(opts.year, n))
            cems.write_old_cems(fn, monthly)
        # Pivot the hourly values to columns
        monthly = cems.pivot_hourly(monthly)
        hourly = pd.concat((hourly, monthly))
    hourly['month'] = hourly['date'].str[4:6].astype(int)
    return hourly

def proc_hourly_meta(hourlymth, fips, sccs):
    '''
    Add in fips and sccs, write files, merge in NOX, SO2, and CO2 into annual FF10 --  
     update and write
    '''
    hourlymth.reset_index(inplace=True)
    hourlymth = hourlymth.merge(fips, on='facility_id', how='left')
    hourlymth = hourlymth.merge(sccs, on=['unit_id','process_id'], how='left')
    hourlymth = hourlymth[hourlymth['daytot'].fillna(0) > 0].copy()
    hourlymth[hourlymth['region_cd'].isnull()].to_csv('nullfips.csv', index=False)
    return hourlymth[hourlymth['region_cd'].notnull()].copy()

def set_key(df):
    '''
    Set the ORIS - poll key index
    '''
    df['key'] = df[['oris_facility_code','oris_boiler_id','poll']].agg('_'.join, axis=1)
    df.set_index('key', inplace=True)
    return df


if __name__ == '__main__':
    main()

