# -*- coding: utf-8 -*-
"""
Created on Wed Apr 22 10:45:23 2020

@author: Haider
"""


from __future__ import print_function, division   # Ensures Python3 printing & division standard
from matplotlib import pyplot as plt
from matplotlib import colors
from matplotlib.colors import LogNorm
import numpy as np
import csv
import pandas as pd 
from pandas import Series, DataFrame 
import lightgbm as lgb
from sklearn.metrics import mean_squared_error
from sklearn.neural_network import MLPClassifier
import time
# Possible other packages to consider:
# cornerplot, seaplot, sklearn.decomposition(PCA)

r = np.random
r.seed(42)

SavePlots = False          # For now, don't save plots (once you trust your code, switch on)
plt.close('all')           # To close all open figures




# ----------------------------------------------------------------------------------- #
# Evaluate (made into a function, as this is called many times):
# ----------------------------------------------------------------------------------- #
def evaluate(bquark,isb) :
    N = [[0,0], [0,0]]   # Make a list of lists (i.e. matrix) for counting successes/failures.
    for i in np.arange(len(isb)):
        if (bquark[i] == 0 and isb[i] == 0) : N[0][0] += 1
        if (bquark[i] == 0 and isb[i] == 1) : N[0][1] += 1
        if (bquark[i] == 1 and isb[i] == 0) : N[1][0] += 1
        if (bquark[i] == 1 and isb[i] == 1) : N[1][1] += 1
    fracWrong = float(N[0][1]+N[1][0])/float(len(isb))
    return N, fracWrong




# ----------------------------------------------------------------------------------- #
# Main program start:
# ----------------------------------------------------------------------------------- #

# Get data:
# ----------------------------------------------------------------------------------- #
data = np.genfromtxt('AlephBtag_MC_small_v2.csv', names=True)

energy = data['energy']
cTheta = data['cTheta']
phi    = data['phi']
prob_b = data['prob_b']
spheri = data['spheri']
pt2rel = data['pt2rel']
multip = data['multip']
bqvjet = data['bqvjet']
ptlrel = data['ptlrel']
nnbjet = data['nnbjet']
isb    = data['isb']

dataset=np.vstack((energy,cTheta))
dataset=np.vstack((dataset,phi))
dataset=np.vstack((dataset,prob_b))
dataset=np.vstack((dataset,spheri))
dataset=np.vstack((dataset,pt2rel))
dataset=np.vstack((dataset,multip))
dataset=np.vstack((dataset,bqvjet))
dataset=np.vstack((dataset,ptlrel))
dataset=dataset.T

split=5000
datalen=len(data)
Isbtest=isb[datalen-split:]
isb=isb[:datalen-split]
datasettest=dataset[datalen-split:]
dataset=dataset[:datalen-split]

start=time.time()
lgb_train = lgb.Dataset(dataset, isb)
lgb_eval = lgb.Dataset(datasettest, Isbtest, reference=lgb_train)
params = {
    'boosting_type': 'gbdt',
    'objective': 'binary',
    'num_leaves': 20,
}
gbm = lgb.train(params,
                lgb_train,
                num_boost_round=1000,
                valid_sets=lgb_eval,
                early_stopping_rounds=50)

y_pred = gbm.predict(datasettest, num_iteration=gbm.best_iteration)
y_pred=[1 if pred > 0.5 else 0 for pred in y_pred]
# eval
print('The rmse of prediction is:', mean_squared_error(Isbtest, y_pred) ** 0.5)

[N,fracwrong]= evaluate(y_pred,Isbtest)
end=time.time()
print (N,fracwrong,end-start,'END gbdt')
start=time.time()
clf = MLPClassifier(max_iter=20000,n_iter_no_change=100,solver='adam',activation='logistic',hidden_layer_sizes=(10, 10), random_state=1)
clf.fit(dataset, isb)

y_predNN=clf.predict(datasettest)
[NNN,fracwrongNN]= evaluate(y_predNN,Isbtest)
end=time.time()
print (NNN,fracwrongNN,end-start,'END NN')