#!/usr/bin/env python

"""
esmond-ps-load-gridftp - process GridFTP logs and load into an Esmond MA.

See further documention in README.rst or use the --help flag for a full
list of arguments.
"""

# doesn't like script name
# pylint: disable=invalid-name

import calendar
import datetime
import json
import logging
import os
import pickle
import signal
import socket
import time

from optparse import OptionParser

from esmond_client.perfsonar.post import (
    EventTypeBulkPost,
    EventTypeBulkPostException,
    EventTypePost,
    EventTypePostException,
    MetadataPost,
    MetadataPostException,
)

# # # #
# Base classes, utility functions, etc
# # # #

# snippet courtesy of gist: https://gist.github.com/nonZero/2907502


class GracefulInterruptHandler(object):  # pylint: disable=too-few-public-methods
    """Context manager to handle sigint nicely."""
    # pylint: disable=attribute-defined-outside-init, unused-argument,
    # pylint: disable=missing-docstring, invalid-name, redefined-builtin
    def __init__(self, sig=signal.SIGINT):
        self.sig = sig

    def __enter__(self):
        self.interrupted = False
        self.released = False

        self.original_handler = signal.getsignal(self.sig)

        def handler(signum, frame):
            self.release()
            self.interrupted = True

        signal.signal(self.sig, handler)

        return self

    def __exit__(self, type, value, tb):
        self.release()

    def release(self):
        if self.released:
            return False

        signal.signal(self.sig, self.original_handler)

        self.released = True

        return True


def setup_log(log_path):
    """
    Usage:
    _log('main.start', 'happy simple log event')
    _log('launch', 'more={0}, complex={1} log=event'.format(100, 200))
    """
    # pylint: disable=redefined-variable-type
    log = logging.getLogger("esmond-ps-load-gridftp")
    if not log_path:
        handle = logging.StreamHandler()
    else:
        logfile = '{0}/esmond-ps-load-gridftp.log'.format(log_path)
        handle = logging.FileHandler(logfile)
    handle.setFormatter(logging.Formatter('ts=%(asctime)s %(message)s'))
    log.addHandler(handle)
    log.setLevel(logging.INFO)
    return log


class LogEntryBase(object):
    """
    Base class (a mixin really) for the log entry classes.
    """
    def to_pickle(self, fpath):
        """Dump internal payload to a pickle file."""
        fh = open(fpath, 'w')
        pickle.dump(self.to_dict(), fh)
        fh.close()

    def from_pickle(self, fpath):
        """Load internal payload from a file."""
        fh = open(fpath, 'r')
        dct = pickle.load(fh)
        fh.close()
        self.__dict__['_data'] = dct

    def to_dict(self):
        """Return internal payload as a dict."""
        # _data defined in subclasses that instantiate this mixin.
        # pylint: disable=no-member
        return self._data


FTP_CODES = {
    200: 'Command okay.',
    202: 'Command not implemented, superfluous at this site.',
    211: 'System status, or system help reply.',
    212: 'Directory status.',
    213: 'File status.',
    214: 'Help message.',
    215: 'NAME system type.',
    220: 'Service ready for new user.',
    221: 'Service closing control connection.',
    225: 'Data connection open; no transfer in progress.',
    226: 'Closing data connection.',
    227: 'Entering Passive Mode (h1,h2,h3,h4,p1,p2).',
    230: 'User logged in, proceed.',
    250: 'Requested file action okay, completed.',
    257: '"PATHNAME" created.',
    331: 'User name okay, need password.',
    332: 'Need account for login.',
    350: 'Requested file action pending further information.',
    421: 'Service not available, closing control connection.',
    425: 'Cant open data connection.',
    426: 'Connection closed; transfer aborted.',
    450: 'Requested file action not taken.',
    451: 'Requested action aborted: local error in processing.',
    452: 'Requested action not taken.',
    500: 'Syntax error, command unrecognized.',
    501: 'Syntax error in parameters or arguments.',
    502: 'Command not implemented.',
    503: 'Bad sequence of commands.',
    504: 'Command not implemented for that parameter.',
    530: 'Not logged in.',
    532: 'Need account for storing files.',
    550: 'Requested action not taken.',
    551: 'Requested action aborted: page type unknown.',
    552: 'Requested file action aborted.',
    553: 'Requested action not taken.',
}


def _convert_host(dest, hname, convert=True):
    """
    Convert hostname to a v4 or v6 ip address
    """

    # if host is in ip form, then skip
    if not convert:
        return hname

    version = None

    try:
        socket.inet_aton(dest)
        version = 4
    except socket.error:
        pass

    try:
        socket.inet_pton(socket.AF_INET6, dest)
        version = 6
    except socket.error:
        pass

    if version == 4:
        r = socket.getaddrinfo(hname, None)
    elif version == 6:
        r = socket.getaddrinfo(hname, None, socket.AF_INET6)
    else:
        r = None

    return r[0][4][0]


def _generate_metadata_args(host, dest, xfer_type, host_to_ip=True):
    """
    Generate the args for the MetadataPost depending on the
    xfer type - this is for the netlogger style log.
    """

    args = {'tool_name': 'gridftp', 'subject_type': 'point-to-point'}

    if xfer_type == 'RETR':
        args['source'] = _convert_host(dest, host, host_to_ip)
        args['destination'] = dest
        args['input_source'] = host
        args['input_destination'] = dest
    elif xfer_type == 'STOR':
        args['source'] = dest
        args['destination'] = _convert_host(dest, host, host_to_ip)
        args['input_source'] = dest
        args['input_destination'] = host

    args['measurement_agent'] = _convert_host(dest, host, host_to_ip)

    return args


def _epoch(dtime):
    """datetime -> epoch"""
    return calendar.timegm(dtime.utctimetuple())

# # # #
# Code/classes to handle the netlogger style logs
# # # #


class LogEntryDataObject(LogEntryBase):
    """
    Encapsulation object to handle a line of netlogger
    style GridFTP logs. Sanitizes the keys when need be.
    """
    def __init__(self, initial=None):
        self.__dict__['_data'] = {}

        if hasattr(initial, 'items'):
            self.__dict__['_data'] = initial
        elif isinstance(initial, list):
            for i in initial:
                k, v = i.split('=')
                k = k.lower().replace('.', '_')
                self.__setattr__(k, v)
        else:
            pass

    def __getattr__(self, name):
        val = self._data.get(name, None)
        if name in ['start', 'date'] and val is not None:
            return self._parse_date(val)
        if name in ['retrans']:
            if val is None:
                return []
            else:
                return [int(x) for x in val.split(',')]
        try:
            val = int(val)
        except (ValueError, TypeError):
            pass
        return val

    def __setattr__(self, name, value):
        self.__dict__['_data'][name] = value

    def _parse_date(self, dstring):  # pylint: disable=no-self-use
        return datetime.datetime.strptime(dstring, '%Y%m%d%H%M%S.%f')


def scan_and_load_netlogger(file_path, last_record, options, _log):  # pylint: disable=too-many-locals, too-many-branches, too-many-statements
    """
    Process the netlogger style logs.  If the metadata can not be
    created, the processing loop halts and returns None.
    """
    # Load the log

    with open(file_path, 'r') as fh:
        data = fh.read()
    data = data.split('\n')

    # Read up to the last record that was processed and start processing
    # subsequent records

    scanning = False

    obj = None
    count = 0

    with GracefulInterruptHandler() as handler:
        for row in data:
            row = row.strip()
            if not row:
                continue
            obj = LogEntryDataObject(row.split())
            if obj.type != 'RETR' and obj.type != 'STOR':
                continue
            if last_record and not scanning:
                if obj.to_dict() == last_record.to_dict():
                    scanning = True
                continue
            count += 1
            if options.progress:
                if count % 100 == 0:
                    _log('scan_and_load_netlogger.info', '{0} records processed'.format(count))
            try:
                dest = obj.dest.lstrip('[').rstrip(']')
                mda = _generate_metadata_args(obj.host, dest, obj.type)
            except Exception, ex:  # pylint: disable=broad-except
                _log('scan_and_load_netlogger.error',
                     'could not generate metadata args for row: {0} - exception: {1}'.format(
                         row, str(ex)))
                continue

            mpost = MetadataPost(
                options.api_url, username=options.user,
                api_key=options.key, script_alias=options.script_alias,
                **mda)
            mpost.add_event_type('throughput')
            mpost.add_event_type('streams-packet-retransmits')
            mpost.add_event_type('failures')
            # Additional/optional data
            mpost.add_freeform_key_value('bw-parallel-streams', obj.streams)
            mpost.add_freeform_key_value('bw-stripes', obj.stripes)
            mpost.add_freeform_key_value('gridftp-program', obj.prog)
            mpost.add_freeform_key_value('gridftp-block-size', obj.block)
            mpost.add_freeform_key_value('tcp-window-size', obj.buffer)
            mpost.add_freeform_key_value('gridftp-bytes-transferred', obj.nbytes)
            # Optional vars - these must be enabled via boolean
            # command line args since these values might be sensitive.
            if options.file_attr:
                mpost.add_freeform_key_value('gridftp-file', obj.file)
            if options.name_attr:
                mpost.add_freeform_key_value('gridftp-user', obj.user)
            if options.volume_attr:
                mpost.add_freeform_key_value('gridftp-volume', obj.volume)

            try:
                metadata = mpost.post_metadata()
            except MetadataPostException as ex:
                _log('scan_and_load_netlogger.error',
                     'MetadataPost failed, abort processing, not updating state - ERR: {0}'.format(
                         str(ex)))
                return None

            if obj.code == 226:
                # Set up bulk post object
                etb = EventTypeBulkPost(
                    options.api_url, username=options.user,
                    api_key=options.key, script_alias=options.script_alias,
                    metadata_key=metadata.metadata_key)
                # Calculate throughput
                try:
                    throughput = 8 * obj.nbytes / (obj.date - obj.start).total_seconds()
                    etb.add_data_point('throughput', _epoch(obj.start), throughput)
                except ZeroDivisionError:
                    # If there is a divide by zero issue calculating throughput
                    # (ie: same timestamps), then don't add the data point.
                    pass

                # Add retransmits
                etb.add_data_point('streams-packet-retransmits', _epoch(obj.start), obj.retrans)

                try:
                    etb.post_data()
                except EventTypeBulkPostException as ex:
                    _log('scan_and_load_json.warning', str(ex))
            else:
                etype = EventTypePost(
                    options.api_url, username=options.user,
                    api_key=options.key, script_alias=options.script_alias,
                    metadata_key=metadata.metadata_key,
                    event_type='failures')
                etype.add_data_point(
                    _epoch(obj.start),
                    {'error': '{0} {1}'.format(obj.code, FTP_CODES.get(obj.code, None))}
                )
                try:
                    etype.post_data()
                except EventTypePostException as ex:
                    _log('scan_and_load_netlogger.warning', str(ex))

            if options.single or handler.interrupted:
                if handler.interrupted:
                    _log('scan_and_load_netlogger.info', 'Got SIGINT - exiting.')
                break

    _log('scan_and_load_netlogger.end', 'Loaded {0} records'.format(count))

    return obj

# # # #
# Code/classes to handle the json style logs
# # # #


class JsonLogEntryDataObject(LogEntryBase):
    """
    Container for the "main" json log entries. Returns attributes
    or other wrapper containers.
    """
    def __init__(self, data={}):  # pylint: disable=dangerous-default-value
        self._data = data

        # "initialize" nested documents in the EntryDataObject containers
        # to ensure that this class' to_dict() method properly
        # return sanitized keys.  EntryDataObject._sanitize() changes
        # the internal dicts, not merely changes the keys on the way out.
        for section in ('getrusage', 'iostat', 'mpstat'):
            getattr(self, section)
        for section in self.streams:
            # init property
            # pylint: disable=pointless-statement
            section.tcpinfo

    # attributes from the "top level/main" json doc
    # these all don't need docstrings
    # pylint: disable=missing-docstring

    @property
    def cmd_type(self):
        return self._data.get('cmd_type')

    @property
    def dest(self):
        return self._data.get('dest')

    @property
    def end_timestamp(self):
        return datetime.datetime.utcfromtimestamp(float(self._data.get('end_timestamp')))

    @property
    def event_type(self):
        return self._data.get('event_type')

    @property
    def file(self):
        return self._data('file')

    @property
    def globus_blocksize(self):
        return self._data.get('globus_blocksize')

    @property
    def host(self):
        # bulletproof against spurious newline in log
        return self._data.get('host').strip()

    @property
    def nbytes(self):
        return self._data.get('nbytes')

    @property
    def nstreams(self):
        return self._data.get('nstreams')

    @property
    def prog(self):
        return self._data.get('prog')

    @property
    def ret_code(self):
        return self._data.get('ret_code')

    @property
    def start_timestamp(self):
        return datetime.datetime.utcfromtimestamp(float(self._data.get('start_timestamp')))

    @property
    def tcp_bufsize(self):
        return self._data.get('tcp_bufsize')

    @property
    def transfer_id(self):
        return self._data.get('transferID')

    @property
    def user(self):
        return self._data.get('user')

    @property
    def volume(self):
        return self._data.get('volume')

    # "nested/richer" document components

    @property
    def getrusage(self):
        return EntryDataObject(self._data.get('getrusage'))

    @property
    def iostat(self):
        return EntryDataObject(self._data.get('iostat'))

    @property
    def mpstat(self):
        return EntryDataObject(self._data.get('mpstat'))

    @property
    def streams(self):
        """Return a list of stream objects - favor getting filtered data with
        the other properties."""
        return [JsonLogEntryStream(x) for x in self._data.get('streams', [])]


class JsonLogEntryStream(object):
    """
    Wrapper for the entries in the streams array.
    """
    def __init__(self, data):
        self._data = data

    # props, no docstrings
    # pylint: disable=missing-docstring

    @property
    def stream(self):
        return self._data.get('stream')

    @property
    def stripe(self):
        return self._data.get('stripe')

    @property
    def tcpinfo(self):
        return EntryDataObject(self._data.get('TCPinfo'))

    def to_dict(self):
        return self._data


class EntryDataObject(object):
    """
    Wrapper for the actual data values in the iostat, getrusage,
    mpstat and tcpinfo dicts.  Typical encapsulation object and
    also _sanitize() the keys of the incoming dicts.
    """
    def __init__(self, initial=None):
        self.__dict__['_data'] = {}

        if hasattr(initial, 'items'):
            self.__dict__['_data'] = initial

            for k in self._data.keys():
                if k != self._sanitize(k):
                    self._data[self._sanitize(k)] = self._data.pop(k)

    def _sanitize(self, sstr):  # pylint: disable=no-self-use
        """
        Sanitize the keys of the incoming data.
        Change '/' -> '_' and '%' -> ''
        """
        return sstr.lower().replace('/', '_').replace('%', '')

    def __getattr__(self, name):
        return self._data.get(name, None)

    def __setattr__(self, name, value):
        self.__dict__['_data'][name] = value

    def __str__(self):
        msg = ''
        for k, v in self._data.items():
            msg += ' {0} : {1}\n'.format(k, v)
        return 'Contains: {0}'.format(msg)

    def get_members(self):
        """Get the members/keys from the payload"""
        for k in self._data.keys():
            yield k

    def to_dict(self):
        """Return payload as dict."""
        return self._data


def scan_and_load_json(file_path, last_record, options, _log):  # pylint: disable=too-many-locals, too-many-branches, too-many-statements
    """
    Process the json style logs.  If the metadata can not be
    created, the processing loop halts and returns None.
    """
    # suck up the log
    with open(file_path, 'r') as fh:
        data = fh.read()
    data = data.split('\n')

    # Read up to the last record that was processed and start processing
    # subsequent records

    scanning = False

    obj = None
    count = 0

    with GracefulInterruptHandler() as handler:
        for row in data:
            row = row.strip()
            if not row:
                continue
            try:
                obj = JsonLogEntryDataObject(json.loads(row))
            except ValueError:
                _log('scan_and_load_json.error',
                     'skipping - log line is not valid json: {0}'.format(row))
                continue

            if obj.event_type != 'Transfer-End':
                continue

            if last_record and not scanning:
                if obj.to_dict() == last_record.to_dict():
                    scanning = True
                continue

            count += 1
            if options.progress:
                if count % 100 == 0:
                    _log('scan_and_load_json.info', '{0} records processed'.format(count))

            try:
                mda = _generate_metadata_args(obj.host, obj.dest, obj.cmd_type, host_to_ip=False)
            except Exception, ex:  # pylint: disable=broad-except
                _log('scan_and_load_json.error',
                     'could not generate metadata args for row: {0} - exception: {1}'.format(
                         row, str(ex)))
                continue

            mpost = MetadataPost(
                options.api_url, username=options.user,
                api_key=options.key, script_alias=options.script_alias,
                **mda)
            mpost.add_event_type('throughput')
            mpost.add_event_type('streams-packet-retransmits')
            # Additional/optional data
            mpost.add_freeform_key_value('bw-parallel-streams', obj.nstreams)
            # find largest stripe in streams array
            stripes = 0
            for s in obj.streams:
                if s.stripe > stripes:
                    stripes = s.stripe
            mpost.add_freeform_key_value('bw-stripes', stripes+1)
            mpost.add_freeform_key_value('gridftp-program', obj.prog)
            mpost.add_freeform_key_value('gridftp-block-size', obj.globus_blocksize)
            mpost.add_freeform_key_value('tcp-window-size', obj.tcp_bufsize)
            mpost.add_freeform_key_value('gridftp-bytes-transferred', obj.nbytes)
            # Optional vars - these must be enabled via boolean
            # command line args since these values might be sensitive.
            if options.file_attr:
                mpost.add_freeform_key_value('gridftp-file', obj.file)
            if options.name_attr:
                mpost.add_freeform_key_value('gridftp-user', obj.user)
            if options.volume_attr:
                mpost.add_freeform_key_value('gridftp-volume', obj.volume)

            try:
                metadata = mpost.post_metadata()
            except MetadataPostException as ex:
                _log('scan_and_load_json.error',
                     'MetadataPost failed, abort processing, not updating state - ERR: {0}'.format(
                         str(ex)))
                return None

            # Set up event type bulk object
            etb = EventTypeBulkPost(
                options.api_url, username=options.user,
                api_key=options.key, script_alias=options.script_alias,
                metadata_key=metadata.metadata_key)

            # Calculate throughput
            try:
                throughput = 8 * obj.nbytes / (obj.end_timestamp -
                                               obj.start_timestamp).total_seconds()
                etb.add_data_point('throughput', _epoch(obj.start_timestamp), throughput)
            except ZeroDivisionError:
                # If there is a divide by zero issue calculating throughput
                # (ie: same timestamps), then don't add the data point.
                pass

            # # Calculate retransmits - a list of integers in this case
            retrans = list()
            for stream in obj.streams:
                retrans.append(stream.tcpinfo.total_retrans)

            etb.add_data_point('streams-packet-retransmits', _epoch(obj.start_timestamp), retrans)

            # # these event types are added wholesale as json blobs.
            et_map = {
                'iostat': obj.iostat.to_dict(),
                'mpstat': obj.mpstat.to_dict(),
                'rusage': obj.getrusage.to_dict(),
                'streams-tcpinfo': [x.to_dict() for x in obj.streams]
            }

            for k, v in et_map.items():
                etb.add_data_point(k, _epoch(obj.start_timestamp), v)

            try:
                etb.post_data()
            except EventTypeBulkPostException as ex:
                _log('scan_and_load_json.warning', str(ex))

            if options.single or handler.interrupted:
                if handler.interrupted:
                    _log('scan_and_load_netlogger.info', 'Got SIGINT - exiting.')
                break

    _log('scan_and_load_json.end', 'Loaded {0} records'.format(count))

    return obj

# # # #
# code to handle "standard vs. json" stuff for the main() code block
# # # #


def get_pickle_path(options):
    """
    Hold two default pickle file names depending on what kind of
    log we are processing. Having a single default in OptionParser or
    requiring a manual arg will doubtlessly cause problems.
    """
    json_or_not = {
        False: './load_grid_ftp.pickle',
        True: './load_grid_ftp.json.pickle',
    }

    if options.pickle:
        return os.path.normpath(options.pickle)
    else:
        return os.path.normpath(json_or_not.get(options.json, False))


def get_log_entry_container(options, log_line=None):
    """
    Return the appropriate kind of log entry container class to
    logic in main() that doesn't need to care which type.
    """
    json_or_not = {
        False: LogEntryDataObject,
        True: JsonLogEntryDataObject,
    }

    def init_entry(options):
        """Init from log line when re-starting."""
        if not options.json:
            return log_line.split()
        else:
            try:
                return json.loads(log_line)
            except ValueError:
                # bulletproof against log errors - only gets called by the
                # code that looks for the last processed line, so an
                # empty dict will do the trick.
                return {}

    if not log_line:
        # return an "empty" instance
        return json_or_not.get(options.json, False)()
    else:
        return json_or_not.get(options.json, False)(init_entry(options))


def scan_and_load(file_path, last_record, options, _log):
    """
    This is an entry point called by main() to dispatch to the
    appropriate file format handler.
    """
    if not options.json:
        return scan_and_load_netlogger(file_path, last_record, options, _log)
    else:
        return scan_and_load_json(file_path, last_record, options, _log)


def main():  # pylint: disable=too-many-locals, too-many-branches
    """Parse args and process the logs."""
    # pylint: disable=line-too-long
    usage = '%prog [ -f filename | -v ]'
    parser = OptionParser(usage=usage)
    parser.add_option('-f', '--file', metavar='FILE',
                      type='string', dest='filename',
                      help='Input filename.')
    parser.add_option('-p', '--pickle_file', metavar='FILE',
                      type='string', dest='pickle', default='',
                      help='Path to pickle file (./load_grid_ftp.pickle or ./load_grid_ftp.json.pickle).')
    parser.add_option('-d', '--dont_write',
                      dest='write', action='store_false', default=True,
                      help='Do not write last position pickle file - can be used to process multiple files by hand, development, etc.')
    parser.add_option('-S', '--single',
                      dest='single', action='store_true', default=False,
                      help='Only load a single record - used for development.')
    parser.add_option('-D', '--delete_state',
                      dest='delete_state', action='store_true', default=False,
                      help='Delete state file from disc after concluding run.')
    parser.add_option('-l', '--log_dir', metavar='DIR',
                      type='string', dest='logdir', default='',
                      help='Write log output to specified directory - if not set, log goes to stdout.')
    parser.add_option('-U', '--url', metavar='ESMOND_REST_URL',
                      type='string', dest='api_url',
                      help='URL for the REST API (default=%default) - required.',
                      default='http://localhost:8000')
    parser.add_option('-u', '--user', metavar='USER',
                      type='string', dest='user', default='',
                      help='POST interface username.')
    parser.add_option('-k', '--key', metavar='API_KEY',
                      type='string', dest='key', default='',
                      help='API key for POST operation.')
    parser.add_option('-s', '--script_alias', metavar='URI_PREFIX',
                      type='string', dest='script_alias', default='/',
                      help='Set the script_alias arg if the perfsonar API is configured to use one (default=%default which means none set).')
    parser.add_option('-F', '--file_attr',
                      dest='file_attr', action='store_true', default=False,
                      help='Include the gridftp file information when sending data to esmond (default=%default since this might be sensitive data).')
    parser.add_option('-N', '--name_attr',
                      dest='name_attr', action='store_true', default=False,
                      help='Include the gridftp user (name) information when sending data to esmond (default=%default since this might be sensitive data).')
    parser.add_option('-V', '--volume_attr',
                      dest='volume_attr', action='store_true', default=False,
                      help='Include the gridftp volume information when sending data to esmond (default=%default since this might be sensitive data).')
    parser.add_option('-v', '--verbose',
                      dest='verbose', action='count', default=False,
                      help='Verbose output - -v, -vv, etc.')
    parser.add_option('-P', '--no-progress',
                      dest='progress', action='store_false', default=True,
                      help='Suppress processing progress messages to console (default: on).')
    parser.add_option('-J', '--json',
                      dest='json', action='store_true', default=False,
                      help='Read JSON formatted GridFTP logs.')
    options, _ = parser.parse_args()

    log_path = None

    if options.logdir:
        log_path = os.path.normpath(options.logdir)
        if not os.path.exists(log_path):
            parser.error('{0} log path does not exist.'.format(log_path))

    log = setup_log(log_path)

    def _log(event, msg):
        log.info('event=%s id=%s %s', event, int(time.time()), msg)

    if not options.filename:
        parser.error('Filename is required.')

    file_path = os.path.normpath(options.filename)

    if not os.path.exists(file_path):
        parser.error('{f} does not exist'.format(f=file_path))

    # Check for previously saved state file

    pickle_path = get_pickle_path(options)

    last_record = None

    if os.path.exists(pickle_path):
        last_record = get_log_entry_container(options)
        last_record.from_pickle(pickle_path)
        _log('main.start', 'found last record: {0}'.format(last_record.to_dict()))
    else:
        _log('main.start', 'no last record found')

    # See if the currently indicated log contains the last record -
    # primarily a check to see if the log has been rotated and we
    # need to look around for our last spot.

    last_record_check = False

    if last_record:
        with open(file_path, 'r') as fh:
            data = fh.read()
        data = data.split('\n')
        for row in data:
            row = row.strip()
            if not row:
                continue
            obj = get_log_entry_container(options, row)
            if obj.to_dict() == last_record.to_dict():
                last_record_check = True
                break

    # Process the file
    if not last_record:
        # Probably a fresh run or manual loads with --dont_write, just do it.
        _log('main.process', 'No last record, processing {0}'.format(file_path))
        last_log_entry = scan_and_load(file_path, last_record, options, _log)
    elif last_record and last_record_check:
        _log('main.process', 'File {0} passes last record check'.format(file_path))
        # We have a hit in the curent log so proceed.
        last_log_entry = scan_and_load(file_path, last_record, options, _log)
    else:
        # State not found so log a warning and assume rotation
        _log('main.warn',
             'File {0} does not contain last log entry. Maybe rotated?- proceeding'.format(
                 file_path))
        last_record = None
        last_log_entry = scan_and_load(file_path, last_record, options, _log)

    if last_log_entry and options.write:
        last_log_entry.to_pickle(pickle_path)

    if options.delete_state:
        os.unlink(pickle_path)


if __name__ == '__main__':
    main()
