#! /usr/bin/python3
import re, sys
from getopt import gnu_getopt, GetoptError
from subprocess import Popen, PIPE

pool_error_messages = [
    r'At least one drive broken in raidset, machine unstable',
    r'Pool read-only: Internal repository error',
]
error_re = re.compile('|'.join([
    r'.*\bFile could not be opened\b',
    r'\s*(?P<pool>[a-zA-Z0-9_-]+)\s+[a-zA-Z0-9_-]+\s+\[\d+\]\s+(' \
        + '|'.join(pool_error_messages) + ')',
]))
warning_re = re.compile('|'.join([
  r'\s*([a-zA-Z0-9_-]+\s+){2}\[\d+\]\s+(Operator intervention)\s*$',
]))
inform_re = re.compile('|'.join([
  r'\s*([a-zA-Z0-9_-]+\s+){2}\[\d+\]\s+tape_maintenance\s*$',
  r'\s*([a-zA-Z0-9_-]+\s+){2}\[\d+\]\s+(Initializing|Loading...)\s*$',
]))
ignore_re = re.compile('|'.join([
  r'\s+$',
  r'\s+\[1\]\[eagleredtrans.gif\]$',
  r'\s+Birds Home$',
  r'\s+\[eagle-grey\.gif\]$',
  r'Pool Property Tables$',
  r'Pool Views$',
  r'\s+\[2\]PoolManager$',
  r'Pool groups of PoolManager$',
  r'\s+PoolGroup\s+Total Space/MB',
  r'\s+\[\d+\][a-zA-Z0-9_]+\s+\d+\s+\d+\s+\d+$',
  r'Pool group [a-zA-Z0-9_]+$',
  r'\s+\[\d+\]Cell View \[\d+\]Space Usage',
  r'\s+CellName\s+DomainName\s+Total Space/MB',
  r'\s+[a-zA-Z0-9_]+\s+[-a-zA-Z0-9_]+\s+\d+\s+\d+\s+\d+$',
  r'\s+_+$',
  r'\s+diskCacheV\d+\.services\.web\.PoolInfoObserverEngine',
  r'References$',
  r'\s+\d+\. http://',
]))

def check_url(url):
    cmd = ['lynx', '-width=512', '-dump', url]
    fh = Popen(cmd, shell = False, stdout = PIPE).stdout
    msgs = []
    error_pools = []
    warning_pools = []
    line_count = 0
    error_count = 0
    warning_count = 0
    unknown_count = 0
    for ln in fh:
        line_count += 1
        mo = re.match(error_re, ln)
        if mo:
            error_count += 1
            msgs.append('E ' + ln)
            try:
                error_pools.append(mo.group('pool'))
            except IndexError:
                pass
            continue
        mo = re.match(warning_re, ln)
        if mo:
            warning_count += 1
            try:
                warning_pools.append(mo.group('pool'))
            except IndexError:
                pass
            msgs.append('W ' + ln)
            continue
        if re.match(inform_re, ln):
            msgs.append('N ' + ln)
            continue
        if not re.match(ignore_re, ln):
            unknown_count += 1
            msgs.append('U ' + ln)
            continue
    fh.close()
    issues = []
    status_code = 0
    if error_count:
        status_code = max(status_code, 2)
        if error_pools == []:
            issues.append('Found %d errors.' % error_count)
        else:
            issues.append('Found %d errors affecting %s.'
                          % (error_count, ', '.join(error_pools)))
    if warning_count:
        status_code = max(status_code, 1)
        if warning_pools == []:
            issues.append('Found %d warnings.' % warning_count)
        else:
            issues.append('Found %d warnings affecting %s.'
                          % (warning_count, ', '.join(warning_pools)))
    if unknown_count:
        issues.append('Some lines were not recognised, please update the probe.')
        status_code = max(status_code, 3)
    if status_code > 0:
        print(' '.join(issues))
    elif line_count < 20:
        print('The output is suspiciously short.')
        print('%s gives only %d lines of output.' % (' '.join(cmd), line_count))
        map(sys.stdout.write, msgs)
        status_code = 3
    else:
        print('Scanned %d lines' % line_count)
        status_code = 0
    map(sys.stdout.write, msgs)
    sys.exit(status_code)

host = None
port = 2288
base_url = None
page_path = None
try:
    opts, args = gnu_getopt(sys.argv[1:], 'U:H:p:g:')
    for opt, arg in opts:
        if opt == '-U':
            base_url = arg
        elif opt == '-H':
            host = arg
        elif opt == '-p':
            port = arg
        elif opt == '-g':
            page_path = '/pools/list/PoolManager/%s/spaces'%arg
        else:
            assert False
    if base_url is None:
        if host is None:
            raise GetoptError('The -H or -U option is mandatory.')
        else:
            base_url = 'http://%s:%d' % (host, port)
    if page_path is None:
        raise GetoptError('The -g option is mandatory.')
except GetoptError as xc:
    sys.stderr.write('%s\n'%xc)
    sys.exit(3)
check_url(base_url + page_path)
