#!/usr/bin/env python
import httplib2, re
import numpy as np
from socket import getfqdn
from pprint import pformat, pprint
from time import time
from json import loads
from execo import logger, Process, SshProcess, sleep, Host
from execo.log import style
from execo_g5k import OarSubmission, oarsub, get_current_oar_jobs, get_oar_job_info, \
    wait_oar_job_start, get_host_attributes, get_oar_job_nodes, default_frontend_connection_params
from argparse import ArgumentParser, RawTextHelpFormatter
from execo_engine import copy_outputs
from execo_g5k.config import default_frontend_connection_params

prog = 'kwapi-g5k-check'
description = 'This tool check the consistency of Kwapi energy monitoring system.' + \
    ' It performs a reservation if no running job is found '
parser = ArgumentParser(prog=prog,
                        description=description,
                        formatter_class=RawTextHelpFormatter,
                        add_help=False)
optinout = parser.add_argument_group(style.host("General options"),
                "Define mode and controls I/O.")
optinout.add_argument("-h", "--help",
                action="help",
                help="show this help message and exit")
optinout.add_argument("-j", "--job-name",
                default="Kwapi-check",
                help="Name of the job to be used for the check.")
optinout.add_argument("-m", "--kwapi-machine",
		default='kwapi',
                help="name of the kwapi instance")

optio = optinout.add_mutually_exclusive_group()
optio.add_argument("-q", "--quiet",
                dest="quiet",
                action="store_true",
                default=False,
                help="Run without printing anything")
optio.add_argument("-v", "--verbose",
                dest="verbose",
                action="store_true",
                default=False,
                help="Run in verbose mode")


args = parser.parse_args()

if args.verbose:
    logger.setLevel('DEBUG')
elif args.quiet:
    logger.setLevel('WARN')
else:
    logger.setLevel('INFO')

copy_outputs('kwapi_check.log', 'kwapi_check.log')

# Configuration
stress_time = 30
site = getfqdn().split('.')[1]

# Retrieving job or submitting a new one
logger.info('Checking for a running job')
current_jobs = get_current_oar_jobs([site])
job_id = None
for job in current_jobs:
    info = get_oar_job_info(job[0], job[1])
    if info['name'] == args.job_name:
        job_id = job[0]
        break
    
if job_id:
    logger.info('Running job found, %s', job_id)
    job = (job_id, site)
else:
    logger.info('No running job, submitting a new one')
    job = oarsub([(OarSubmission(resources = "nodes=ALL", name=args.job_name, 
                                 job_type='allow_classic_ssh'), site)])[0]

logger.info('Waiting for job start')
wait_oar_job_start(job[0], job[1])
logger.info('Retrieving node list')
nodes = get_oar_job_nodes(job[0], job[1])
## Checking that all nodes have a probe declared in Kwapi configuration
h1 = httplib2.Http()
resp, content = h1.request("http://" + machine + "." + site + ".grid5000.fr:5000/probes/", "GET")
content = loads(content)

# for node in nodes:
#     if site + '.' + node.address.split('.')[0] not in content['probes']['power']:
#         logger.warning('%s is not declared in Kwapi', node)
#         nodes.remove(node)

# Stressing hosts and retrieving power measures from Kwapi-API
measures = {}
i = 1
for node in sorted(nodes, key=lambda x: int(x.address.split('.')[0].split('-')[1])):
    if site + '.' + node.address.split('.')[0] not in content['probes']['power']:
        logger.warning('%s is not monitored energically', node)
        continue
    n_proc = get_host_attributes(node)['architecture']['smt_size']
    logger.info(str(i) + ' Host %s has %s cores', style.host(node.address), n_proc)
    ts = time()
    measures[ts] = {'stress': node.address}
    logger.info('Launching stress on ' + style.host(node.address))
    stress = SshProcess('killall stress; stress --cpu ' + 
                        str(n_proc) + ' --timeout '+ str(stress_time), 
                        node,
                        connection_params={'user': default_frontend_connection_params['user']})
    while True:
        resp, content = h1.request("http://" + machine + "." + site + ".grid5000.fr:5000/probes/", "GET")
        content = loads(content)
        
        if resp['status'] == '200':
            for probe, values in content['probes']['power'].iteritems():
                if probe not in measures[ts]:
                    measures[ts][probe] = []
                measures[ts][probe].append(values['value'])
        if not stress.started:
            stress.start()        
        if stress.ended:
            i += 1
            break
        sleep(1)
    logger.info('Stress ended, analyzing results')
    if site + '.' + node.address.split('.')[0] in measures[ts]:
        try:
            measure = measures[ts][site + '.' + node.address.split('.')[0]]
            first = measure[0]
            last = measure[-1]
            med = np.median(measure)
            logger.info('%s: %s %s %s', node.address.split('.')[0], first, med, last)
            if abs(last - med) > abs(first - med):
                logger.info(style.user1('OK')+ " " + node.address )
            else:
                logger.info('Looking for a variation in other probes')
                for probe in sorted(map(lambda node: unicode(site + '.' + node.address.split('.')[0]), nodes)):
                    measure = measures[ts][probe]
                    first = measure[0]
                    last = measure[-1]
                    med = np.median(measure)
                    logger.debug('%s: %s %s %s', probe, first, med, last)
                    if abs(last - med) < abs(first - med) and last > first:
                        logger.error('Power from ' + node.address + 
                                ' is recorded by probe ' + probe) 
                        logger.info(probe + ': ' + pformat(measure))
                        break
                logger.error('No variation found on any probes, wrong record in API ?')
        except Exception, e: 
            logger.error('problem with probe ' + node.address.split('.')[0])
            print str(e)
    else:
        logger.error('probe %s is not monitored', node.address.split('.')[0])
        
    logger.info('Waiting for energy decrease')
    sleep(30)
from execo_g5k.topology import *

gr = g5k_graph()
for node in nodes:
    gr.add_host(node.address)

server = Process('iperf -s').start()

i=1
try:
    for node in sorted(nodes, key=lambda x: int(x.address.split('.')[0].split('-')[1])):
        logger.info('Checking host %s', style.host(node.address))
        net_in = {}
        net_out = {}
        switch = list(nx.all_neighbors(gr, node.address))[0]
        probe_in = site + '.' + switch.split('.')[0] + '_' + node.address.split('.')[0]
        probe_out = site + '.' + node.address.split('.')[0] + '_' + switch.split('.')[0] 
        
        iperf = SshProcess('iperf -c ' + site + ' -r -t 20', node) 
        while True:
            resp, content = h1.request("http://" + machine + "." + site + ".grid5000.fr:5000/probes/", "GET")
            content = loads(content)
            if resp['status'] == '200':
                data_in = content['probes']['network_in'][probe_in]
                data_out = content['probes']['network_in'][probe_out]
                net_in[data_in['timestamp']] = data_in['value']
                net_out[data_out['timestamp']] = data_out['value']
            if not iperf.started:
                iperf.start()        
            if iperf.ended:
                i += 1
                break
            sleep(1)
        
        print net_in.values()[0], net_in.values()[-1]
        print net_out.values()[0], net_out.values()[-1]
        
finally:
    server.kill()
    
    


 
## Retrieving post-mortem measurement
# h2 = httplib2.Http()
# resp, content = h2.request("http://energy." + site + 
#     ".grid5000.fr:12000/timeseries/?job_id=" + str(job[0]), "GET")
# content = loads(content) 
# 
# print content
