#!/usr/bin/env python

# ----------------------------------------------------------------------------------- //
#
#  ROOT macro for introducing ROOT with examples.
#
#  Author: Troels C. Petersen (NBI)
#          Christian Michelsen (NBI, for Python plots)
#  Date:   17th of November 2017 (latest update)
#
#  Run by: ./IntroToPlottingAndFitting.py
#
#  ---------------------------------------------------------------------------------- //

# First, import the modules you want to use:
from __future__ import print_function, division        #
import numpy as np                                     # Matlab like syntax for linear algebra and functions
import matplotlib.pyplot as plt                        # Plots and figures like you know them from Matlab
from iminuit import Minuit                             # The actual fitting tool, better than scipy's
from probfit import BinnedLH, Chi2Regression, Extended # Helper tool for fitting

plt.close('all')         # to close all open figures

r = np.random             # Random generator
r.seed(42)                # Set a random seed (but a fixed one)

SavePlots = True          # For now, don't save plots (once you trust your code, switch on)
verbose = True            # For now, print a lot of output (once you trust your code, switch off)
Nverbose = 10             # But only print a lot for the first 10 random numbers


# =============================================================================
# Initial functions 
# =============================================================================

# function to create a nice string output for figures
def nice_string_output(names, values, extra_spacing = 2):
    max_values = len(max(values, key=len))
    max_names = len(max(names, key=len))
    string = ""
    for name, value in zip(names, values):
        string += "{0:s} {1:>{spacing}} \n".format(name, value,
                   spacing = extra_spacing + max_values + max_names - len(name))
    return string[:-2]

#----------------------------------------------------------------------------------
# Create the data:
#----------------------------------------------------------------------------------

Npoints = 10000                       # Number of random points produced

x_all = np.zeros(Npoints)             # create empty arrays to store the numbers in
y_all = np.zeros(Npoints)

# Notice that we create an empty array first instead of appending to lists, for computationally speed

# Loop to get some random values and fill them into the arrays:
for iexp in range( Npoints ) : 
    x_all[iexp] = r.normal() * 0.8 - 0.5 
    y_all[iexp] = r.normal() * 1.3 + 0.5 
    
    if (verbose and iexp < Nverbose) :
        print("  Histogram input value {0:2d}:   x = {1:6.3f}     y = {2:6.3f}".format(
                iexp, x_all[iexp], y_all[iexp]))

# Numpy-version:
x_all = r.normal(-0.5, 0.8, Npoints)
y_all = r.normal(0.5, 1.3, Npoints)


#----------------------------------------------------------------------------------
# Histogram of data (1D):
#----------------------------------------------------------------------------------

# In the following, we want to produce two histograms (here both Gaussian), and plot
# them together in a single plot. This can be done in TWO WAYS!
        
# The first one is very MatLab-like, and probably also the simplest, but in the longer
# run it has a few drawbacks, while the second way is more Pythonic (and Object Oriented),
# and allows for returning to these plots later in the code.

# General input (for both methods):
Nbins = 100
xmin = -5
xmax = 5


# Method 0 (MatLab-like):

# create just a single figure and axes
plt.figure(figsize=(12, 6)) # figsize is in inches
# create a histogram with Nbin anount  of bins in the range from xmin to xmax. 
# Choose a step-type histogram with a line width of 2 in the color and label chosen.
plt.hist(x_all, bins=Nbins, range=(xmin, xmax), 
         histtype='step', linewidth=2, 
         label='Gaussian ($\mu$ = -0.5)', color='blue') 
plt.hist(y_all, bins=Nbins, range=(xmin, xmax), histtype='step', linewidth=2, label='Gaussian ($\mu$ = +0.5)', color='red')

plt.xlabel("Random numbers") # the label of the x axis
plt.ylabel("Frequency") # the label of the y axis
plt.title("Distribution of Gaussian numbers") # the title of the plot
plt.legend(loc='best') # could also be # loc = 'upper right' e.g.


# Method 1 (Python-like):

# create just a single figure and axes
fig, ax = plt.subplots(figsize=(12, 6))  # figsize is in inches
# create a histogram and give it a name. 
# It has with Nbin anount of bins in the range from xmin to xmax. 
# Choose a step type histogram with a line width of 2 in the color and label chosen.
hist1 = ax.hist(x_all, bins=Nbins, range=(xmin, xmax), histtype='step', linewidth=2, label='Gaussian ($\mu$ = -0.5)', color='blue')
hist2 = ax.hist(y_all, bins=Nbins, range=(xmin, xmax), histtype='step', linewidth=2, label='Gaussian ($\mu$ = +0.5)', color='red')

ax.set_xlabel("Random numbers") # the label of the y axis
ax.set_ylabel("Frequency")  # the label of the y axis
ax.set_title("Distribution of Gaussian numbers") # the title of the plot
ax.legend(loc='best') # could also be # loc = 'upper right' e.g.


#----------------------------------------------------------------------------------
# Fit to the data / histogram (1D):
#----------------------------------------------------------------------------------

# Define your PDF / model 
def gauss_pdf(x, mu, sigma):
    """Normalized Gaussian"""
    return 1 / np.sqrt(2 * np.pi) / sigma * np.exp(-(x - mu) ** 2 / 2. / sigma ** 2)

def gauss_extended(x, N, mu, sigma) :
    """Non-normalized Gaussian"""
    return N * gauss_pdf(x, mu, sigma)

# could also be written as:
# gauss_extended = Extended(gauss_pdf)

# Make a Binned Likelihood object based on the fit function gauss_extended and 
# the original data x_all. Use the same bins and bounds as the original histogram. 
# extended=True: this means that we let the number of events N be a variable as well
# extended=False: this means that we have a fixed number of events, N.
binned_likelihood = BinnedLH(gauss_extended, x_all, 
                             bins=Nbins, bound=(xmin, xmax), 
                             extended=True) 

# be ready to fit to the LH object with the given fit parameters. Other options are:
# x=2 set intial value of x to 2
# limit_x = (-1,1) set the range for x
# y=2, fix_y=True fix y value to 2
minuit = Minuit(binned_likelihood, pedantic=False, 
                mu=-1, sigma=1, N=1000)  
minuit.migrad()  # perform the actual fit
#minuit_output = [minuit.get_fmin(), minuit.get_param_states()] # save the output parameters in case needed
fit_N, fit_mu, fit_sigma = minuit.args # the fitted values of the parameters
for name in minuit.parameters:
    print("Fit value: {0} = {1:.5f} +/- {2:.5f}".format(
            name, minuit.values[name], minuit.errors[name]))
LLH_value = minuit.fval # the LLH value


binwidth = (xmax-xmin) / Nbins # the scale factor between histogram and the fit. Takes e.g. bin width into account.
x_fit = np.linspace(-5, 5, 1000) # Create the x-axis for the plot of the fitted function
y_fit = binwidth*gauss_extended(x_fit, fit_N, fit_mu, fit_sigma) # the fitted function

ax.plot(x_fit, y_fit, '-', color='blue', label='Fit with Gaussian to x')    
ax.legend(loc='upper right')

# Note, we refer to the old plot "ax". Had we done another plot inbetween, 
# we would not have been able to plot on top of the old figure with the matlab syntax


## Note, had we wanted to plot either of the histograms as errorbars:
#y, bin_edges = np.histogram(x_all, bins=Nbins, range=(xmin, xmax), normed=False)
#x = 0.5*(bin_edges[1:] + bin_edges[:-1])
#sy = np.sqrt(y)
#hist1 = ax.errorbar(x, y, sy, fmt='.', label='Prime number distribution')


#----------------------------------------------------------------------------------
# Get statistics
#----------------------------------------------------------------------------------

# In this section we calculate the needed statitiscal values for the fit

# first we extract the x- and y- values of the histogram and assume 
# Poisson statistics to calc the uncertainties
entries, bin_edges, _ = hist1
bin_centers = 0.5*(bin_edges[1:] + bin_edges[:-1])
hist1_x, hist1_y = bin_centers, entries
hist1_sy = np.sqrt(hist1_y)


# Here we calculate the chi2 value of the fit and the number of non-empty bins
chi2_val = 0
N_NotEmptyBin = 0
for x, y, sy in zip(hist1_x, hist1_y, hist1_sy):
    if y > 0:
        f = binwidth * gauss_extended(x, fit_N, fit_mu, fit_sigma) # calc the model value
        residual  = ( y-f ) / sy  # find the uncertainty-weighted residual
        chi2_val += residual**2  # the chi2-value is the squared residual
        N_NotEmptyBin += 1 # count the bin as non-empty since sy>0 (and thus y>0)


# Here we find the number of degrees of freedom, which is the number of 
# elements in the fit (non-empty) minus the number of fitting parameters
N_DOF = N_NotEmptyBin - len(minuit.args)


from scipy import stats
chi2_prob =  stats.chi2.sf(chi2_val, N_DOF) # The chi2 probability given N_DOF degrees of freedom

names = ['Entries', 'Mean', 'STD Dev.', 'Chi2/ndf', 'Prob']
values = ["{:d}".format(len(x_all)), 
          "{:.3f}".format(x_all.mean()), 
          "{:.3f}".format(x_all.std(ddof=1)), 
          "{:.3f} +/- {:.3f}".format(chi2_val, N_DOF), 
          "{:.3f}".format(chi2_prob)
          ] 

ax.text(0.02, 0.95, nice_string_output(names, values), 
        family='monospace', 
        transform=ax.transAxes, fontsize=12, 
        verticalalignment='top')
# Note, we refer to the old plot "ax". Had we done another plot inbetween, 
# we would not have been able to plot on top of the old figure with the matlab syntax


plt.tight_layout()        # Remove outer whitespace
plt.show(block=False)     # in terminal, continue code after showing figure

if SavePlots:
    fig.savefig('Histogram.pdf', dpi=600)


#----------------------------------------------------------------------------------
# Graph with fit (1D):
#----------------------------------------------------------------------------------

# Define points for a graph with errors:
Ndata = 51
x  = np.zeros(Ndata) # create empty arrays to store the numbers in
y  = np.zeros(Ndata)
sx  = np.zeros(Ndata)
sy  = np.zeros(Ndata)

# Having defined four arrays filled with zeros, below we fill it with values:
for i in range(Ndata) :
    x[i] = -1.25 + 0.05*i
    y[i] = -2.0 - 2.0*x[i] + 1.0*x[i]*np.sin(x[i]) + 2.0*x[i]**3
    if (x[i] > 0.0) : 
        y[i] += 0.5                   # Put in a discontinuity by hand!
    y[i] += r.normal(0., 0.1)         # Adding a Gaussian variation of 0.1
    sx[i] = 0.0                       # Deciding that x is precise!
    sy[i] = 0.1                       # Writing down the error.
    if (verbose and i < Nverbose) :
        print("  Graph point {0:2d}:   x = {1:6.3f} +- {2:5.3f}    y = {3:6.3f} +- {4:5.3f}".format(i,x[i], sx[i], y[i], sy[i]))

# Numpy-version:
x = np.linspace(-1.25, -1.25+0.05*(Ndata-1), Ndata)
y = -2.0 - 2.0*x + 1.0*x*np.sin(x) + 2.0*x**3
y[x>0.0] += 0.5
y += r.normal(0., 0.1, Ndata) 
sx = np.zeros(Ndata)
sy = 0.1*np.ones(Ndata)


# plot the data
        
# notice that we give the plots new names so we can differentiate between the old and the new plots
fig2, ax2 = plt.subplots(figsize=(12, 6)) 

ax2.errorbar(x, y, xerr=sx, yerr=sy, fmt='.')
ax2.set_title("Fit of a graph")
ax2.set_xlabel("X")
ax2.set_ylabel("Y")
ax2.set_xlim(-1.3, 1.3)


# Defining external function (see below for a more advanced function):   

def func_simple(x, p0, p1, p2, p3):
    return p0 + p1*x + p2*x*np.sin(x) + p3*x*x*x

chi2_object_simple = Chi2Regression(func_simple, x, y, sy)
minuit_simple = Minuit(chi2_object_simple, pedantic=False, 
                       p0=0., p1=2.0, p2=0.0, p3=2.0)
minuit_simple.migrad() # fit
p0, p1, p2, p3 = minuit_simple.args
print("Simple fit")
for name in minuit_simple.parameters:
    print("Fit value: {0} = {1:.5f} +/- {2:.5f}".format(name, minuit_simple.values[name], minuit_simple.errors[name]))


x_fit = np.linspace(-1.3, 1.3, 1000)
y_fit_simple = func_simple(x_fit, p0, p1, p2, p3)
ax2.plot(x_fit, y_fit_simple, '-')


# Defining external function (which can include "if" statements and everything else!):   
def func_advanced(x, p0, p1, p2, p3, p4) :
    if x > 0:
        return p0 + p1*x + p2*x*np.sin(x) + p3*x**3 + p4
    else:
        return p0 + p1*x + p2*x*np.sin(x) + p3*x**3
func_advanced_vec = np.vectorize(func_advanced) 
# Above is a function that transforms a non-vectorized function to vectorized, 
# such that func_advanced_vec allows x to be a numpy array instead of just a scalar
            

chi2_object_advanced = Chi2Regression(func_advanced, x, y, sy)
minuit_advanced = Minuit(chi2_object_advanced, pedantic=False, p0=0., p1=2.0, p2=0.0, p3=2.0, p4=0.5)
minuit_advanced.migrad() # fit
p0, p1, p2, p3, p4 = minuit_advanced.args
print("Advanced fit")
for name in minuit_advanced.parameters:
    print("Fit value: {0} = {1:.5f} +/- {2:.5f}".format(name, minuit_advanced.values[name], minuit_advanced.errors[name]))


x_fit = np.linspace(-1.3, 1.3, 1000)
y_fit_advanced = func_advanced_vec(x_fit, p0, p1, p2, p3, p4 )
ax2.plot(x_fit, y_fit_advanced, '-')
# if you have many parameters and dont want to write out all the names,
# you can write it the following way (which is called argument unpacking (the asterix *))
# p = minuit.args # save all the parameters as the tuple p
# y_fit_advanced = func_advanced_vec(x_fit, *p) # input while unpacking the p


chi2_val = 0
for x_i, y_i, sy_i in zip(x, y, sy):
    f = func_advanced( x_i, p0, p1, p2, p3, p4)
    residual  = ( y_i - f ) / sy_i
    chi2_val += residual**2

DOF = Ndata - len(minuit_advanced.args)

from scipy import stats
chi2_prob =  stats.chi2.sf(chi2_val, DOF)

names = ['Entries', 'Chi2/ndf', 'Prob', 'p0', 'p1', 'p2', 
         'p3', 'p4']
values = ["{:d}".format(Ndata),
          "{:.3f} / {:d}".format(chi2_val, DOF), 
          "{:.3f}".format(chi2_prob), 
          "{:.3f} +/- {:.3f}".format(
                  minuit_advanced.values['p0'], 
                  minuit_advanced.errors['p0']),
          "{:.3f} +/- {:.3f}".format(minuit_advanced.values['p1'], minuit_advanced.errors['p1']),
          "{:.3f} +/- {:.3f}".format(minuit_advanced.values['p2'], minuit_advanced.errors['p2']),
          "{:.3f} +/- {:.3f}".format(minuit_advanced.values['p3'], minuit_advanced.errors['p3']),
          "{:.3f} +/- {:.3f}".format(minuit_advanced.values['p4'], minuit_advanced.errors['p4'])
          ]

# place a text box in upper left in axes coords
ax2.text(0.02, 0.95, nice_string_output(names, values), family='monospace', transform=ax2.transAxes, fontsize=12, verticalalignment='top')

plt.tight_layout()
plt.show(block=False)

if SavePlots:
    fig2.savefig('Graph.pdf', dpi=600)


# If you want to execute your scripts in Spyder and normally through the terminal,
# you can use the snippet below to pause scripts running in the terminal,
# so your figures don't immediately close.
# If you are only going to use Spyder, just remove or ignore the part below
try: 
    __IPYTHON__
except: 
    raw_input('Press Enter to exit')