#!/usr/bin/env python

# ----------------------------------------------------------------------------------- #
# 
#   This Python macro illustrates the use of ChiSquare as a goodness-of-fit measure,
#   how this distribution comes about, and that it actually works, here with three
#   different examples!
#
#   References:
#     Barlow: Chapter 6
#     Cowan: Chapter 2.7, Chapter 7
#     Bevington: Chapter 6
# 
#   Author: Troels C. Petersen (NBI)
#   Email:  petersen@nbi.dk
#   Date:   12th of November 2017
# 
# ----------------------------------------------------------------------------------- #

from __future__ import division, print_function
import numpy as np
import matplotlib.pyplot as plt
from iminuit import Minuit
from probfit import Chi2Regression
plt.close('all')


# function to create a nice string output
def nice_string_output(names, values, extra_spacing = 2):
    max_values = len(max(values, key=len))
    max_names = len(max(names, key=len))
    string = ""
    for name, value in zip(names, values):
        string += "{0:s} {1:>{spacing}} \n".format(name, value,
                   spacing = extra_spacing + max_values + max_names - len(name))
    return string[:-2]

# ----------------------------------------------------------------------------------- #
# Chi Squared Test
# ----------------------------------------------------------------------------------- #

SavePlots = True

r = np.random                         # Random generator
r.seed(42)                            # Set a random seed (but a fixed one - more on that later.)

# ------------------------------------------------------------------ #
# Generate and fit LINEAR data:
# ------------------------------------------------------------------ #

Nexp = 1000
NpointsLin = 17

alpha0 = 3.6
alpha1 = 0.3
sigmay = 0.4

array_Chi2_Lin = np.zeros(Nexp)
array_Prob_Lin = np.zeros(Nexp)

# Loop over number of experiments to generate data and subsequent Chi2 values:
for iexp in range( Nexp ) : 

    # Generate points:
    xLin = np.arange(NpointsLin)+1
    exLin = np.zeros_like(xLin)
    yLin = alpha0 + alpha1 * xLin + r.normal(0, sigmay, NpointsLin)
    eyLin = sigmay*np.ones_like(xLin)

    def fit_function_Lin(x, alpha0, alpha1):
        return alpha0 + alpha1*x
    
    chi2_object = Chi2Regression(fit_function_Lin, xLin, yLin, eyLin) 
    minuitLin = Minuit(chi2_object, pedantic=False, alpha0=1, alpha1=1, print_level=0)  
    minuitLin.migrad();  # perform the actual fit
    Chi2Lin = minuitLin.fval # the chi2 value
    
    NvarLin = 2                      # Number of variables (alpha0 and alpha1)
    NdofLin = NpointsLin - NvarLin   # Number of degrees of freedom
    
    from scipy import stats
    ProbLin =  stats.chi2.sf(Chi2Lin, NdofLin) # The chi2 probability given N_DOF degrees of freedom
    
    array_Chi2_Lin[iexp] = Chi2Lin
    array_Prob_Lin[iexp] = ProbLin


fig, ax = plt.subplots(ncols=3, figsize=(16, 6))
ax[0].errorbar(xLin, yLin, eyLin, fmt='ro', ecolor='k', elinewidth=1, capsize=2, capthick=1)
ax[0].plot(xLin, fit_function_Lin(xLin, *minuitLin.args), '-r') 

names = ['Chi2/ndf', 'Prob', 'p0', 'p1']
values = ["{:.3f} / {:d}".format(Chi2Lin, NdofLin), 
          "{:.3f}".format(ProbLin), 
          "{:.3f} +/- {:.3f}".format(minuitLin.values['alpha0'], minuitLin.errors['alpha0']),
          "{:.3f} +/- {:.3f}".format(minuitLin.values['alpha1'], minuitLin.errors['alpha1']),
          ]

ax[0].text(0.05, 0.95, nice_string_output(names, values), family='monospace', transform=ax[0].transAxes, fontsize=10, verticalalignment='top')


# ------------------------------------------------------------------ #
# Generate and fit OSCILLATING data:
# ------------------------------------------------------------------ #

NpointsOsc = 19

mean  = 1.6
amp   = 3.3
omega = 0.7
phase = 0.3
sigmay = 0.5

array_Chi2_Osc = np.zeros(Nexp)
array_Prob_Osc = np.zeros(Nexp)


# Loop over number of experiments to generate data and subsequent Chi2 values:
for iexp in range( Nexp ) : 

    # Generate points:
    xOsc = np.arange(NpointsOsc)+1
    exOsc = np.zeros_like(xLin)
    yOsc = mean + amp*np.cos(omega*xOsc + phase) + r.normal(0, sigmay, NpointsOsc)
    eyOsc = sigmay*np.ones_like(xOsc)

    # Fit points:
    def fit_function_Osc(x, mean, amp, omega, phase):
        return mean + amp*np.cos(omega*x + phase)
    
    chi2_object = Chi2Regression(fit_function_Osc, xOsc, yOsc, eyOsc) 
    minuitOsc = Minuit(chi2_object, pedantic=False, mean=mean, amp=amp, omega=omega, phase=phase, print_level=0)  
    # minuitOsc = Minuit(chi2_object, pedantic=False, mean=0.0, amp=0.0, omega=0.0, phase=0.0, print_level=0)  
    minuitOsc.migrad();             # Perform the actual fit
    Chi2Osc = minuitOsc.fval        # Get the chi2 value
    
    NvarOsc = 4                     # Number of variables (alpha0 and alpha1)
    NdofOsc = NpointsOsc - NvarOsc  # Number of degrees of freedom
    
    ProbOsc =  stats.chi2.sf(Chi2Osc, NdofOsc) # The chi2 probability given N degrees of freedom, Ndof
    
    array_Chi2_Osc[iexp] = Chi2Osc
    array_Prob_Osc[iexp] = ProbOsc
    
    
ax[1].errorbar(xOsc, yOsc, eyOsc, fmt='ro', ecolor='k', elinewidth=1, capsize=2, capthick=1)
x_axis = np.linspace(1, NpointsOsc, 1000)
ax[1].plot(x_axis, fit_function_Osc(x_axis, *minuitOsc.args), '-r') 
ax[1].set_ylim(-3.5, 8.5)
# If you want to show uncertainties in y:
# y, bin_edges = np.histogram(x_all, bins=Nbins, range=(xmin, xmax), normed=False)
# x = 0.5*(bin_edges[1:] + bin_edges[:-1])
# sy = np.sqrt(y)

names = ['Chi2/ndf', 'Prob', 'mean', 'amp', 'omega', 'phase']
values = ["{:.3f} / {:d}".format(Chi2Osc, NdofOsc), 
          "{:.3f}".format(ProbOsc), 
          "{:.3f} +/- {:.3f}".format(minuitOsc.values['mean'], minuitOsc.errors['mean']),
          "{:.3f} +/- {:.3f}".format(minuitOsc.values['amp'], minuitOsc.errors['amp']),
          "{:.3f} +/- {:.3f}".format(minuitOsc.values['omega'], minuitOsc.errors['omega']),
          "{:.3f} +/- {:.3f}".format(minuitOsc.values['phase'], minuitOsc.errors['phase']),
          ]

ax[1].text(0.05, 0.95, nice_string_output(names, values), family='monospace', transform=ax[1].transAxes, fontsize=10, verticalalignment='top')


# ------------------------------------------------------------------ #
# Generate and fit EXPONENTIAL binned data:
# ------------------------------------------------------------------ #

NpointsExp = 1000   # Put this number of points (exponentially distributed) in a histogram.
NbinsExp = 17

tau = 3.1

array_Chi2_Exp = np.zeros(Nexp)
array_Prob_Exp = np.zeros(Nexp)

def fit_function_Exp(x, tau):
    return 1.0 / tau * np.exp(- x / tau)

def fit_function_Exp_Ext(x, tau, N):
    return N * fit_function_Exp(x, tau)


# Loop over number of experiments to generate data and subsequent Chi2 values:
for iexp in range( Nexp ) : 
    
    dataExp = r.exponential(tau, NpointsExp)

    yExp, xExp_edges = np.histogram(dataExp, bins=NbinsExp, range=(0, NbinsExp))
    xExp = (xExp_edges[1:] + xExp_edges[:-1])/2
    syExp = np.sqrt(yExp)
    
    Chi2_object = Chi2Regression(fit_function_Exp_Ext, xExp[yExp>0], yExp[yExp>0], syExp[yExp>0])
    minuitExp = Minuit(Chi2_object, pedantic=False, tau = tau, N=NpointsExp,  print_level=0)  
    minuitExp.migrad();  # perform the actual fit

    Chi2Exp = minuitExp.fval
    
    NvarExp = 2                    # Number of variables (alpha0 and alpha1)
    NdofExp = NbinsExp - NvarExp   # Number of degrees of freedom
    
    ProbExp =  stats.chi2.sf(Chi2Exp, NdofExp) # The chi2 probability given N_DOF degrees of freedom
    
    array_Chi2_Exp[iexp] = Chi2Exp
    array_Prob_Exp[iexp] = ProbExp


ax[2].hist(dataExp, NbinsExp, range=(0, NbinsExp), histtype='step')
ax[2].set_xlim(0, NbinsExp)
x_axis = np.linspace(0, NbinsExp, 1000)
ax[2].plot(x_axis, fit_function_Exp_Ext(x_axis, *minuitExp.args), '-r') 

names = ['Entries', 'Mean', 'STD', 'Chi2/ndf', 'Prob', 'N', 'tau']
values = ["{:d}".format(len(dataExp)),
          "{:.3f}".format(dataExp.mean()),
          "{:.3f}".format(dataExp.std(ddof=1)),
          "{:.3f} / {:d}".format(Chi2Lin, NdofLin), 
          "{:.3f}".format(ProbLin), 
          "{:.3f} +/- {:.3f}".format(minuitExp.values['N'], minuitExp.errors['N']),
          "{:.3f} +/- {:.3f}".format(minuitExp.values['tau'], minuitExp.errors['tau']),
          ]
ax[2].text(0.3, 0.95, nice_string_output(names, values), family='monospace', transform=ax[2].transAxes, fontsize=10, verticalalignment='top')
plt.show(block=False)


if (SavePlots) : 
    fig.savefig("Chi2Dist_SeveralExamples.pdf")


# ------------------------------------------------------------------ #
# Fit the data and plot the result:
# ------------------------------------------------------------------ #

# If there have been more than one experiment, then make another white canvas:
if (Nexp > 1) :
    
    xaxis = np.linspace(0, 50, 1000)
    yaxis = stats.chi2.pdf(xaxis, 15)
    
    array_Chi2 = [array_Chi2_Lin, array_Chi2_Osc, array_Chi2_Exp]

    fig2, ax2 = plt.subplots(ncols=3, figsize=(16, 6))
    for i in range(3):
        ax2[i].hist(array_Chi2[i], 50, range=(0, 50), histtype='step', normed=True)
        ax2[i].plot(xaxis, yaxis)
    
        string = "Entries {:>7d}\n".format(len(array_Chi2[i]))
        string += "Mean {:>10.3f}\n".format(array_Chi2[i].mean())
        string += "STD {:>11.4}".format(array_Chi2[i].std(ddof=1))
        ax2[i].text(0.6, 0.95, string, family='monospace', transform=ax2[i].transAxes, fontsize=10, verticalalignment='top')
    plt.show(block=False)
        
    if (SavePlots) : 
        fig2.savefig("Chi2Dist_SeveralCases.pdf")


# Finally, ensure that the program does not termine (and the plot disappears), before you press enter:
try:
    __IPYTHON__
except:
    raw_input('Press Enter to exit')


#---------------------------------------------------------------------------------- 
# 
# Make sure you've read the relevant references and that you understand not only what
# the ChiSquare is, but also that it follows the ChiSquare distribution, and that the
# probability of obtaining such a ChiSquare or worse can be calculated from it.
# 
# The program generates a certain number of datasets in three different ways, from
# which the Chi2 of the fit is recorded.
# 
# 
# Questions:
# ----------
#  1) Why have I chosen the three examples to have 17 points, 19 points and 17 bins?
#     Hint: What is the number of degrees of freedom in each of the three cases?
#
#  2) In the example of the linear fit, what number of points lies outside +-1 sigma?
#     Is that a reasonable number?
#
#  3) In the oscillatory case, try to drop the line were you set the parameters
#     (line defining "minuitOsc"), and see how well the fit goes, when it does not
#     have good starting values.
#
#  4) In the exponential fit, where do the uncertainties come from? And is the fit
#     reasonable?
# 
# 
# Advanced questions:
# -------------------
#  1) Why does the last of the three Chi2 distributions not fit quite?
#     Try to change the number of generated points to 100 instead of 1000,
#     and/or change the lifetime to tau=2.1. Does this increase the mismatch
#     of the Chi2 distribution. Does that give you a hint why?
# 
#----------------------------------------------------------------------------------