#!/usr/bin/python

# Copyright (C) 2013 Michael Coughlin
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

"""Earthquake xml file generator.

This script generates earthquake xml files using notices from the
internet and USGS PDL client.

Comments should be e-mailed to michael.coughlin@ligo.org.

"""

import os, sys, glob, optparse, warnings, time, json
import numpy as np
import subprocess
from subprocess import Popen
from lxml import etree
import pickle

import lal.gpstime

from seismon import (eqmon, utils)

import matplotlib
matplotlib.use("AGG")
matplotlib.rcParams.update({'font.size': 18})
from matplotlib import pyplot as plt
from matplotlib import cm

from sklearn import gaussian_process
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Matern, RBF, WhiteKernel, ConstantKernel
from sklearn import preprocessing
from sklearn import svm

__author__ = "Michael Coughlin <michael.coughlin@ligo.org>"
__version__ = 1.0
__date__    = "9/22/2013"

# =============================================================================
#
#                               DEFINITIONS
#
# =============================================================================

def parse_commandline():
    """@parse the options given on the command-line.
    """
    parser = optparse.OptionParser(usage=__doc__,version=__version__)

    parser.add_option("-i", "--inputFiles", help="Seismon files.")
    parser.add_option("-o", "--outputDirectory", help="output file.")
    parser.add_option("--doLockloss",  action="store_true", default=False)

    parser.add_option("-v", "--verbose", action="store_true", default=False,
                      help="Run verbosely. (Default: False)")

    opts, args = parser.parse_args()

    # show parameters
    if opts.verbose:
        print >> sys.stderr, ""
        print >> sys.stderr, "running network_eqmon..."
        print >> sys.stderr, "version: %s"%__version__
        print >> sys.stderr, ""
        print >> sys.stderr, "***************** PARAMETERS ********************"
        for o in opts.__dict__.items():
          print >> sys.stderr, o[0]+":"
          print >> sys.stderr, o[1]
        print >> sys.stderr, ""

    return opts

def params_struct(opts):
    """@create params structure

    @param opts
        command line options
    """

    params = {}
    params["inputFiles"] = opts.inputFiles
    params["outputDirectory"] = opts.outputDirectory

    return params

def predict_events():
    """@write pdl events

    @param params
        seismon params structure
    """

    warnings.filterwarnings("ignore")

    # Parse command line
    opts = parse_commandline()
    params = params_struct(opts)

    if not os.path.isdir(params["outputDirectory"]):
        utils.mkdir(params["outputDirectory"])
    outputFile = os.path.join(params["outputDirectory"],"earthquakes.txt")

    inputFiles = params["inputFiles"].split(",")
    fid = open(outputFile,'w')
    for inputFile in inputFiles:
        events = np.loadtxt(inputFile)
        for ii,event in enumerate(events):
            fid.write("%.1f %.1f %.1f %.1f %.1f %.1f %.1f %.5e %d %d %.1f %.1f %e %.1f %.1f %.1f %.1f %.1f %.1f %.1f %.1f %.1f %.1f %.1f %.1f %.5e %.1f %.5e %.1f %.5e %.1f %d\n"%(event[0],event[1],event[2],event[3],event[4],event[5],event[6],event[7],event[8],event[9],event[10],event[11],event[12],event[13],event[14],event[15],event[16],event[17],event[18],event[19],event[20],event[21],event[22],event[23],event[24],event[25],event[26],event[27],event[28],event[29],event[30],event[31]))
    fid.close()
    events = np.loadtxt(outputFile)

    idx = np.where(~np.isnan(np.log10(events[:,25])))[0]
    events = events[idx,:]

    global M, r, h, amp
    M = events[:,1]
    lat = events[:,10]
    lon = events[:,11]
    r = events[:,12]/1000.0
    h = events[:,13]
    az = events[:,14]
    amp = events[:,25]

    X = np.vstack((M,lat,lon,r,h,az)).T
    y = np.log10(amp)

    scaler = preprocessing.StandardScaler().fit(X)
    X = scaler.transform(X) 

    Nsamples, Nvars = X.shape
    N = 1000
    #N = 500
    Noise_level    =  0.01
    Xp = np.zeros((N,Nvars))
    yp = np.zeros((N,))
    for ii in xrange(N):
        idx = int(np.floor(np.random.rand()*Nsamples))
        rand_values = Noise_level*np.std(X,axis=0)*np.random.randn(Nvars,)
        rand_value = Noise_level*np.std(y)*np.random.randn()

        Xp[ii,:] = X[idx,:] + rand_values
        yp[ii] = y[idx] + rand_value

    #perm = np.random.permutation(len(y))
    #idx = perm[:int(float(len(y))/2.0)]
    #idy = perm[int(float(len(y))/2.0)+1:]

    #idx = perm.copy()
    #idy = perm.copy()

    #gp = gaussian_process.GaussianProcess(theta0=1e-2, thetaL=1e-4, thetaU=1e-1, nugget = 0.1)
    #pred, sigma_pred = gp.predict(X)

    # First run
    #kernel = RBF(length_scale=1.0, length_scale_bounds=(1e-6, 1e6))
    #kernel = 1.0 * RBF(length_scale=100.0, length_scale_bounds=(1e-6, 1e6)) \
    #+ WhiteKernel(noise_level=1, noise_level_bounds=(1e-10, 1e+10))
    #kernel = WhiteKernel(noise_level=0.0, noise_level_bounds=(-10.0, 10.0)) + ConstantKernel(constant_value=1.0, constant_value_bounds=(-1e3, 1e3)) + RBF(length_scale=1.0, length_scale_bounds=(-1e2, 1e2))
    #kernel = ConstantKernel(constant_value=1.0, constant_value_bounds=(1e-3, 1e3)) + RBF(length_scale=1.0, length_scale_bounds=(1e-2, 1e2))
    kernel = WhiteKernel(noise_level=1, noise_level_bounds=(1e-10, 1e+10)) + RBF(length_scale=1.0, length_scale_bounds=(1e-2, 1e2))
    #kernel = ConstantKernel() + Matern(length_scale=2, nu=3/2) + WhiteKernel(noise_level=1)

    #gp = GaussianProcessRegressor(kernel=kernel) #.fit(X, y)
    #gp.fit(X, y)  
    #pred, pred_std = gp.predict(X, return_std=True)

    gp = GaussianProcessRegressor(kernel=kernel,alpha = np.std(yp)) #.fit(X, y)
    gp.fit(Xp, yp)
    pred, pred_std = gp.predict(X, return_std=True)

    gpfile = os.path.join(params["outputDirectory"],'gp.pickle')
    with open(gpfile, 'wb') as handle:
        pickle.dump((scaler,gp), handle, protocol=pickle.HIGHEST_PROTOCOL)

    with open(gpfile, 'rb') as fid:
        scaler,gp = pickle.load(fid)

    #print gp._equation

    #events = events[idy,:]
    amp = events[:,25]
    pred = 10**pred
    pred_std = pred*np.log(10)*pred_std

    ratio1 = amp/pred
    ratio2 = pred/amp
    ratio = np.max(np.vstack((ratio1,ratio2)).T,axis=1)
    perc = float(len(np.where(ratio < 3.0)[0])) / float(len(ratio))

    print "%.5f of events within a factor of 3."%perc

    events[:,7] = pred
    pred_std = pred_std[events[:,7].argsort()]
    events = events[events[:,7].argsort()]

    fid = open(outputFile,'w')
    for ii,event in enumerate(events):
        fid.write("%.1f %.1f %.1f %.1f %.1f %.1f %.1f %.5e %d %d %.1f %.1f %e %.1f %.1f %.1f %.1f %.1f %.1f %.1f %.1f %.1f %.1f %.1f %.1f %.5e %.1f %.5e %.1f %.5e %.1f %d\n"%(event[0],event[1],event[2],event[3],event[4],event[5],event[6],event[7],event[8],event[9],event[10],event[11],event[12],event[13],event[14],event[15],event[16],event[17],event[18],event[19],event[20],event[21],event[22],event[23],event[24],event[25],event[26],event[27],event[28],event[29],event[30],event[31]))
    fid.close()
    events = np.loadtxt(outputFile)

    if opts.doLockloss:

        indexes1 = np.where(events[:,31] == 1)[0]
        indexes2 = np.where(events[:,31] == 2)[0]
        indexes = np.hstack((indexes1,indexes2))
        indexes3 = np.where(events[:,25] >= 0.0)[0]
        indexes = np.intersect1d(indexes,indexes3)

        M = events[indexes,1]
        lat = events[indexes,10]
        lon = events[indexes,11]
        r = events[indexes,12]/1000.0
        h = events[indexes,13]
        az = events[indexes,14]
        pred = events[indexes,7]

        X = np.vstack((M,lat,lon,r,h,az,np.log10(pred))).T
        y = events[:,31]
        y[y == 1] = 0
        y[y == 2] = 1
        y = y[indexes]

        scaler = preprocessing.StandardScaler().fit(X)
        X = scaler.transform(X)

        Nsamples, Nvars = X.shape
        N = 10000
        #N = 500
        Noise_level    =  0.01
        Xp = np.zeros((N,Nvars))
        yp = np.zeros((N,))
        for ii in xrange(N):
            idx = int(np.floor(np.random.rand()*Nsamples))
            rand_values = Noise_level*np.std(X,axis=0)*np.random.randn(Nvars,)

            Xp[ii,:] = X[idx,:] + rand_values
            yp[ii] = int(y[idx])

        C = 1.0  # SVM regularization parameter
        clf = svm.SVC(kernel='rbf', gamma='auto', C=C, probability=True)
        clf.fit(Xp, yp)

        svmfile = os.path.join(params["outputDirectory"],'svm.pickle')
        with open(svmfile, 'wb') as handle:
            pickle.dump((scaler,clf), handle, protocol=pickle.HIGHEST_PROTOCOL)

        with open(svmfile, 'rb') as fid:
            scaler,clf = pickle.load(fid)

        lockloss_prediction = clf.predict(X)
        lockloss_prob = clf.predict_proba(X)
        lockloss_prob = lockloss_prob[:,1]

        idx = np.where(lockloss_prediction == y)[0]
        perc = float(len(np.where(lockloss_prediction == y)[0])) / float(len(y))
        print "%.5f of events with lockloss predicted correctly."%perc

        indlockloss = np.arange(len(y))

    ind = np.arange(len(events[:,7]))
    vmin = 5.0
    vmax = 7.0

    plt.figure()
    ax = plt.gca()

    yerr = np.zeros((2,len(events)))
    yerr[0,:] = events[:,7]/3.0
    yerr[1,:] = events[:,7]*3.0
    #plt.errorbar(ind,events[:,7],yerr=yerr,label='Predicted')
    #plt.plot(ind,events[:,7],'gray',label='Predicted')
    plt.errorbar(ind,events[:,7],pred_std,label='Predicted')
    plt.fill_between(ind, yerr[0,:], yerr[1,:], facecolor='gray')

    sc = ax.scatter(ind,events[:,25],s=20,c=events[:,1],vmin=vmin, vmax=vmax,label='Measured')
    ax.set_yscale('log')
    #plt.semilogy(events[:,15],'kx',label='Measured')
    #plt.semilogy(events[:,7],'c*',label='Predicted')
    plt.legend(loc='best',numpoints=1)
    plt.xlabel("Event number")
    plt.ylabel("Ground Velocity [m/s]")
    plt.xlim([-0.5,len(events[:,0])+0.5])
    plt.ylim([1e-8,1e-3])
    cbar = plt.colorbar(sc)
    cbar.set_label("Earthquake Magnitude")
    plt.show()
    plotName = os.path.join(params["outputDirectory"],'prediction.png')
    plt.savefig(plotName)
    plotName = os.path.join(params["outputDirectory"],'prediction.eps')
    plt.savefig(plotName)
    plotName = os.path.join(params["outputDirectory"],'prediction.pdf')
    plt.savefig(plotName)
    plt.close()

    if opts.doLockloss:
        plt.figure()
        ax = plt.gca()
        ax.plot(indlockloss,y,'ko')
        #ax.plot(indlockloss,lockloss_prob,'bo')
        indexes = np.where(lockloss_prediction == y)[0]
        sc = ax.scatter(indlockloss[indexes],lockloss_prob[indexes],s=20,c='g',marker='+')
        indexes = np.setdiff1d(indlockloss,indexes)
        sc = ax.scatter(indlockloss[indexes],lockloss_prob[indexes],s=20,c='r',marker='x')
    
        plt.xlabel("Event Number")
        plt.ylabel("Lockloss Probability")
        plt.xlim([-0.5,len(indlockloss)+0.5])
        plt.ylim([-0.5,1.5])
        plt.show()
        plotName = os.path.join(params["outputDirectory"],'lockloss_prediction.png')
        plt.savefig(plotName)
        plotName = os.path.join(params["outputDirectory"],'lockloss_prediction.eps')
        plt.savefig(plotName)
        plotName = os.path.join(params["outputDirectory"],'lockloss_prediction.pdf')
        plt.savefig(plotName)
        plt.close()

        plt.figure()
        ax = plt.gca()

        bins = np.linspace(0,1,50)
        indexes = np.where(y == 0)[0]
        hist1, bin_edges = np.histogram(lockloss_prob[indexes], bins=bins)
        hist1 = hist1 / float(np.sum(hist1))
        indexes = np.setdiff1d(indlockloss,indexes)
        hist2, bin_edges = np.histogram(lockloss_prob[indexes], bins=bins)
        hist2 = hist2 / float(np.sum(hist2))
        bins = (bins[1:] + bins[:-1])/2.0

        plt.plot(bins,hist1,'r',label="No lockloss")
        plt.plot(bins,hist2,'k--',label="Lockloss")

        plt.legend(loc='best',numpoints=1)

        plt.ylabel("Probability Density Function")
        plt.xlabel("Lockloss Probability")
        #plt.xlim([-0.5,len(indlockloss)+0.5])
        #plt.ylim([-0.5,1.5])
        plt.show()
        plotName = os.path.join(params["outputDirectory"],'lockloss_histogram.png')
        plt.savefig(plotName)
        plotName = os.path.join(params["outputDirectory"],'lockloss_histogram.eps')
        plt.savefig(plotName)
        plotName = os.path.join(params["outputDirectory"],'lockloss_histogram.pdf')
        plt.savefig(plotName)
        plt.close()

    vel = (events[:,12]/1000.0)/(events[:,24]-events[:,0])

    distances = np.linspace(0,100000,1000)
    times = distances / 3.5
    plt.figure()
    ax = plt.gca()
    ax.plot(events[:,7],vel,'kx')
    ax.plot([1e-7,1e-3],[3.5,3.5],'k--')
    #ax.set_yscale('log')
    ax.set_xscale('log')
    plt.xlabel("Ground velocity [m/s]")
    plt.ylabel("Earthquake velocity [m/s]")
    plt.xlim([1e-8,1e-3])
    plt.ylim([2,5])
    plt.show()
    plotName = os.path.join(params["outputDirectory"],'velocity.png')
    plt.savefig(plotName)
    plotName = os.path.join(params["outputDirectory"],'velocity.eps')
    plt.savefig(plotName)
    plotName = os.path.join(params["outputDirectory"],'velocity.pdf')
    plt.savefig(plotName)
    plt.close()

    vmin = -8
    vmax = -4

    plt.figure()
    ax = plt.gca()
    sc = ax.scatter(events[:,12],events[:,1],c=np.log10(events[:,25]),vmin=vmin, vmax=vmax)
    ax.set_xscale('log')
    plt.xlabel("Distance [m]")
    plt.ylabel("Earthquake Magnitude")
    cbar = plt.colorbar(sc)
    cbar.set_label("log10(Ground velocity [m/s])")
    #plt.xlim([1e-6,1e-3])
    #plt.ylim([5,9])
    plt.show()
    plotName = os.path.join(params["outputDirectory"],'mag_distance.png')
    plt.savefig(plotName)
    plotName = os.path.join(params["outputDirectory"],'mag_distance.eps')
    plt.savefig(plotName)
    plotName = os.path.join(params["outputDirectory"],'mag_distance.pdf')
    plt.savefig(plotName)
    plt.close()

    plt.figure()
    ax = plt.gca()
    sc = ax.scatter(events[:,12],events[:,14],c=np.log10(events[:,25]),vmin=vmin, vmax=vmax)
    ax.set_xscale('log')
    plt.xlabel("Distance [m]")
    plt.ylabel("Azimuth [deg]")
    cbar = plt.colorbar(sc)
    cbar.set_label("log10(Ground velocity [m/s])")
    #plt.xlim([1e-6,1e-3])
    #plt.ylim([5,9])
    plt.show()
    plotName = os.path.join(params["outputDirectory"],'azimuth_distance.png')
    plt.savefig(plotName)
    plotName = os.path.join(params["outputDirectory"],'azimuth_distance.eps')
    plt.savefig(plotName)
    plotName = os.path.join(params["outputDirectory"],'azimuth_distance.pdf')
    plt.savefig(plotName)
    plt.close()

    plt.figure()
    ax = plt.gca()
    sc = ax.scatter(events[:,14],events[:,1],c=np.log10(events[:,25]),vmin=vmin, vmax=vmax)
    plt.xlabel("Azimuth [deg]")
    plt.ylabel("Earthquake Magnitude")
    cbar = plt.colorbar(sc)
    cbar.set_label("log10(Ground velocity [m/s])")
    #plt.xlim([1e-6,1e-3])
    #plt.ylim([5,9])
    plt.show()
    plotName = os.path.join(params["outputDirectory"],'mag_azimuth.png')
    plt.savefig(plotName)
    plotName = os.path.join(params["outputDirectory"],'mag_azimuth.eps')
    plt.savefig(plotName)
    plotName = os.path.join(params["outputDirectory"],'mag_azimuth.pdf')
    plt.savefig(plotName)
    plt.close()

    plt.figure()
    ax = plt.gca()
    indexes = np.where(events[:,31] == 0)[0]
    sc = ax.scatter(events[indexes,12],events[indexes,25],s=20,c='b')
    indexes = np.where(events[:,31] == 1)[0]
    sc = ax.scatter(events[indexes,12],events[indexes,25],s=20,c='g',marker='+')
    indexes = np.where(events[:,31] == 2)[0]
    sc = ax.scatter(events[indexes,12],events[indexes,25],s=20,c='r',marker='x')
    ax.set_xscale('log')
    ax.set_yscale('log')
    plt.xlim([1e5,1e8])
    plt.ylim([8e-7,1e-3])
    plt.xlabel("Distance [m]")
    plt.ylabel("Peak ground motion [m/s]")
    plt.grid()
    plt.show()
    plotName = os.path.join(params["outputDirectory"],'lockloss_vel_distance.png')
    plt.savefig(plotName)
    plotName = os.path.join(params["outputDirectory"],'lockloss_vel_distance.eps')
    plt.savefig(plotName)
    plotName = os.path.join(params["outputDirectory"],'lockloss_vel_distance.pdf')
    plt.savefig(plotName)
    plt.close()


# =============================================================================
#
#                                    MAIN
#
# =============================================================================

if __name__=="__main__":

    predict_events()

