#!/usr/bin/env python
# Copyright European Organization for Nuclear Research (CERN) 2013
#
# Licensed under the Apache License, Version 2.0 (the "License");
# You may not use this file except in compliance with the License.
# You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
#
# Authors:
# - Ralph Vigne <ralph.vigne@cern.ch> 2015
# - Cedric Serfon, <cedric.serfon@cern.ch> 2017

'''
Probe arbitrary Graphite time series against a provided threshold. Examples:

1) Check how many minutes in the last half hour had an average submit time higher than 5 seconds.
    Command: python tools/probes/common/graphite2nagios --target aliasByNode(removeBelowValue(stats.timers.rucio.transfertool.fts3.submit_transfer.*.mean,5000),6) --critical 25 --from 30 --warning 20
    Explained:
        *) Average per minute is indicated by using the mean - attribute at the end of the target metric. Other options are count, count_ps, lower, upper, and sum
        *) The example above uses removeBelowValue to filter data points below the intended threshold, which is 5 seconds. Other options are: removeAboveValue, removeAbovePercentile, removeBelowPercentile
    Note the sliding time frame, thus messages can be sent multiple times.

2) Check if the current availability of the load balancer is above 20%
    Command: python tools/probes/common/graphite2nagios -target aliasByNode(removeAboveValue(stats.rucio.monitoring.loadbalancer.rucio-lb-prod..Idle_pct,20),4) --critical 1 --from 1
    => check if the availability if the load balancer is below 20 percent, and sends a critical if so. Must be executed every minute.

More information on supported function can be found at: http://graphite.readthedocs.org/en/latest/functions.html
'''

import logging
import pprint
import sys

import requests

from optparse import OptionParser

OK, WARNING, CRITICAL, UNKNOWN = 0, 1, 2, 3
GRAPHITE_URL = "rucio-graphite-prod-02.cern.ch"


def parse_options():
    parser = OptionParser()
    parser.add_option('-t', '--target', help='Graphite metric to be validated', action='store', dest='target')
    parser.add_option('-c', '--critical', help='Threshold for number of data points triggering a critical message.', action='store', dest='critical')
    parser.add_option('-f', '--from', help='Optional: Time period of data to be requested in minutes e.g. for last 30 minutes = 30', action='store', dest='period')
    parser.add_option('-w', '--warning', help='Optional: Threshold for number of data points triggering a waning message.', action='store', dest='warning')
    parser.add_option('-v', '--verbose', help='Optional: For the curious.', action='store_true', dest='debug', default=False)
    (options, args) = parser.parse_args()

    # Checking mandatory arguments
    if not options.target:
        parser.error('Target metric not specified. E.g. stats.timers.rucio.transfertool.fts3.submit_transfer.*.mean')
    if not options.critical:
        parser.error('No threshold value for critical defined, but mandatory. E.g. 10')

    # Checking optional arguments and print warning
    if options.debug:
        logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
    else:
        logging.basicConfig(stream=sys.stdout, level=logging.ERROR)

    if not options.period:
        logging.debug('No timer period provided. Using the last 60 minites as default period')
        options.period = 60
    if not options.warning:
        logging.debug('No threshold for warning message provided. Warningsd will not be printed.')
        options.warning = options.critical
    return options


if __name__ == "__main__":
    options = parse_options()
    pp = pprint.PrettyPrinter(indent=4)
    exit_code = OK
    URL = 'http://%s/render?format=json&from=-%smin&target=%s' % (GRAPHITE_URL, int(options.period) + 1, options.target)  # Adding 1 minute to aviod null values in case of "in the last minute" requests
    logging.debug('Requesting metric: %s' % URL)
    req = requests.get(URL)
    if req.status_code != 200:
        logging.debug('Failed requesting data from Graphite with: %s. Use --verbose for details.' % req.status_code)
    JSON = req.json()
    if options.debug:
        pp.pprint(JSON)
    for target in JSON:
        counter = 0
        for db in target['datapoints']:
            if db[0] is not None:
                counter += 1
        if int(options.warning) < counter and counter < int(options.critical):
            print '%s INCIDENTS %s INTERVAL %s min.' % (target['target'], counter, options.period)
            if exit_code in [OK]:
                exit_code = WARNING
        if int(options.critical) <= counter:
            print '%s INCIDENTS %s INTERVAL %s min.' % (target['target'], counter, options.period)
            if exit_code in [OK, WARNING]:
                exit_code = CRITICAL
        logging.debug('%s incidents observed for target %s.' % (counter, target['target']))
    if exit_code != OK:
        print 'Details: https://%s/render?from=-%smin&target=%s' % (GRAPHITE_URL, int((int(options.period) * 1.2) + 1), options.target)  # Adding 20% of time to make plot more comprehensive
    sys.exit(exit_code)
