#!/usr/bin/env python

# ---------------------------------------------------------------------------------- #
#                                  "Fitting is an art!"
# ---------------------------------------------------------------------------------- #
# 
#  Python macro for testing likelihood fit vs. ChiSquare fits, and get a feel for
#  which fitting proceedure is likely to give the "best" results.
#
#  This program is CENTRAL in that it contains all the necessary code for fitting.
#  The case has been chosen carefully, and should illustrate many points. Please,
#  "play around" with it as much as you can.
#
#  IMPORTANT NOTE: We have calculated the Chi2 value (and associated probability) also
#  for the likelihood cases. But this is only for illustration, and not to be taken
#  literally, as the Chi2 value does not "make sense" for these, which minimize with
#  respect to the (-2*log) likelihood.
#
#  The case in question is:
#   * (possibly) Gaussian peak distribution on constant background (peak searching/fitting)
#
#  Author: Troels C. Petersen (NBI)
#  Email:  petersen@nbi.dk
#  Date:   1st of December 2017
# 
# ---------------------------------------------------------------------------------- #

from __future__ import division, print_function
import numpy as np
import matplotlib.pyplot as plt
from iminuit import Minuit
from probfit import Chi2Regression, BinnedLH, UnbinnedLH
from scipy import stats
plt.close('all')

# Ignore LogWarning
# (Python may complain, if it takes the log of too small numbers, e.g. log-likelihood)
import warnings
warnings.filterwarnings("ignore")

r = np.random             # Random generator
r.seed(42)                # Set a random seed (but a fixed one)

Verbose = False
Saveplots = True

# These are switches to decide, if signal should be included in the data, and also fitted for:
IncludeSignalInData = False
IncludeSignalInFit = False

# Assert that we do not try to fit any signal that isn't there!
# However, in reality this is often what we actually do, to assert what the largest
# deviation from background is. But this type of "bump hunting" is challeging, and
# for later in the course/statistics education.
assert not ((IncludeSignalInData == False) and (IncludeSignalInFit == True))


# Function to create a nice string output
def nice_string_output(names, values, extra_spacing = 2):
    max_values = len(max(values, key=len))
    max_names = len(max(names, key=len))
    string = ""
    for name, value in zip(names, values):
        string += "{0:s} {1:>{spacing}} \n".format(name, value,
                   spacing = extra_spacing + max_values + max_names - len(name))
    return string[:-2]


# Function to calculate ChiSquare value (used for LLH fits, where we would like the Chi2 for illustration):
def calc_chi2(x, y, sy, function, func_args, scale = 1):
    N = (y > 0).sum() - len(func_args)
    f = scale * function(x[y > 0], *func_args)
    chi2 = np.sum( (y[y > 0] - f)**2 / sy[y > 0]**2 )
    return chi2, N


#----------------------------------------------------------------------------------- #
# CASE: Gaussian distribution on an exponential background:
#         sig(x) = Nsig / sqrt(2pi) / sigma * exp(-0.5 * sqr((x-mu)/sigma)) for x in [0,200]
#         bkg(x) = Nbkg * c * exp(-c * x)                                   for x in [0,200]
#----------------------------------------------------------------------------------- #

if IncludeSignalInData:
    # Produce signal (Gaussian):
    Nsigx  =  100
    mux    = 125.50
    sigmax =   3.75
    x_signal = r.normal(mux, sigmax, Nsigx)
    if (Verbose) :
        print("  Signal: ", x_signal)
else:
    x_signal = []

# Produce background (exponential):
Nbkgx = 1000
taux  =   80.0    # "Lifetime" of background
x_background = r.exponential(taux, Nbkgx)
if (Verbose) :
    print("  Background: ", x_background)

# Histogram specifications:
xmin =   0.0
xmax = 200.0
Nbins =  200
binwidth = (xmax-xmin) / Nbins

# Note: The x_signal and x_background needs to be in a parenthesis inside the other
# parenthesis, because of the posibility of several other options.
x_data = np.concatenate((x_signal, x_background))    # Combine the two numpy arrays
if (Verbose) :
    print("  Data: ", x_data)


#----------------------------------------------------------------------------------- #
# Define PDFs:
#----------------------------------------------------------------------------------- #

# Normalized Gaussian:
def gauss_pdf(x, mu, sigma):
    # return np.exp(-(x - mu)**2 / 2.0 / sigma**2)
    return 1.0 / np.sqrt(2 * np.pi) / np.abs(sigma) * np.exp(-(x - mu)**2 / 2.0 / sigma**2)

# Normalized Exponential:
def exponential_pdf(x, tau):
    # return np.exp(-x/tau)
    return 1.0 / tau * np.exp(-x/tau)

# Full model PDF (one for ChiSquare [CS], one for Binned Likelihood [BL]) and
# one for unbinned likelihood (UL)
    
if IncludeSignalInFit:
    def PDFmodel_CS(x, Nsig, mu, sigma, Nbkg, tau) :
        return Nbkg * binwidth * exponential_pdf(x, tau) + Nsig * binwidth * gauss_pdf(x, mu, sigma)
        
    def PDFmodel_BL(x, Nsig, mu, sigma, Nbkg, tau) :
        return Nbkg * exponential_pdf(x, tau)  + Nsig * gauss_pdf(x, mu, sigma)
    
    def PDFmodel_UL(x, Nsig, mu, sigma, Nbkg, tau) :
        return Nbkg * exponential_pdf(x, tau) + Nsig * gauss_pdf(x, mu, sigma)

else:
    def PDFmodel_CS(x, Nbkg, tau) :
        return Nbkg * binwidth * exponential_pdf(x, tau) 
        
    def PDFmodel_BL(x, Nbkg, tau) :
        return Nbkg * exponential_pdf(x, tau)
    
    def PDFmodel_UL(x, Nbkg, tau) :
        return Nbkg * exponential_pdf(x, tau)


#----------------------------------------------------------------------------------- #
# Plot the data:
#----------------------------------------------------------------------------------- #

fig, ax = plt.subplots(figsize=(15, 6))
# For a normal histogram (without error bars) one would use:
# hist_data = ax.hist(x_data, bins=Nbins, range=(xmin, xmax), histtype='step', linewidth=2, label='Data', color='blue')

# We want to plot the histogram with error bars (also used for the Chi2 below):
y, bin_edges = np.histogram(x_data, bins=Nbins, range=(xmin, xmax), normed=False)
x = 0.5*(bin_edges[1:] + bin_edges[:-1])
sy = np.sqrt(y)
hist_data = ax.errorbar(x, y, sy, fmt='.', linewidth=2, label="Data")

ax.set(xlabel="Photon invariant mass [GeV]", ylabel = "Frequency / {0:1.0f} GeV".format(binwidth),
       title = "Distribution of diphoton invariant masses", xlim=(xmin, xmax))


#---------------------------------------------------------------------------------- #
# Fit the data:
#---------------------------------------------------------------------------------- #

# NOTE, that for the starting values for the fit, we here (to begin with) use the values that the data was generated with!
# That is of course a great advantage, and one of the main challenges in general fitting is to find these good starting values.

# ChiSquare fit:
# --------------
# Do the fit with a Chi2 minimisation (only using bins with entries):
chi2reg = Chi2Regression(PDFmodel_CS, x[y>0], y[y>0], sy[y>0])
if IncludeSignalInFit:
    minuit_cs = Minuit(chi2reg, pedantic=False, print_level=0, Nsig=Nsigx, mu=mux, sigma=sigmax, Nbkg=Nbkgx, tau=taux)
else:
    minuit_cs = Minuit(chi2reg, pedantic=False, print_level=0, Nbkg=Nbkgx, tau=taux)
minuit_cs.migrad()                                                         # Perform the actual fit
if (not minuit_cs.get_fmin().is_valid) :                                   # Check if the fit converged
    print("  WARNING: The ChiSquare fit DID NOT converge!!!")

minuit_output = [minuit_cs.get_fmin(), minuit_cs.get_param_states()]       # Save the output parameters in case needed
if IncludeSignalInFit:
    csfit_Nsig, csfit_mu, csfit_sigma, csfit_Nbkg, csfit_tau = minuit_cs.args  # The fitted values of the parameters
else:
    csfit_Nbkg, csfit_tau = minuit_cs.args
Chi2_value_cs = minuit_cs.fval                                             # The Chi2 value
NvarModel_cs = len(minuit_cs.args)
Ndof_cs = len(y[y>0]) - NvarModel_cs
ProbChi2_cs = stats.chi2.sf(Chi2_value_cs, Ndof_cs)
for name in minuit_cs.parameters:
    print("  ChiSquare Fit result: {0} = {1:.5f} +/- {2:.5f}".format(name, minuit_cs.values[name], minuit_cs.errors[name]))

# Plot fit result on top of histograms:
x_csfit = np.linspace(xmin, xmax, 1000)                                    # Create the x-axis for the plot of the fitted function
if IncludeSignalInFit:
    y_csfit = binwidth*PDFmodel_CS(x_csfit, csfit_Nsig, csfit_mu, csfit_sigma, csfit_Nbkg, csfit_tau)
else:
    y_csfit = binwidth*PDFmodel_CS(x_csfit, csfit_Nbkg, csfit_tau)
# y_csfit = binwidth*PDFmodel_CS(x_csfit, *minuit_cs.args)
ax.plot(x_csfit, y_csfit, '-', color='red', linewidth=2, label='ChiSquare fit')

# Show the useful histogram and fit information:
if IncludeSignalInFit:
    names = ['Entries', 'Chi2/ndf', 'Prob', 'Nsig', 'mu', 'sigma', 'Nbkg', 'tau']
    values = ["{:d}".format(len(x_data)),
              "{:.3f} / {:d}".format(Chi2_value_cs, Ndof_cs),
              "{:.3f}".format(ProbChi2_cs),
              "{:.3f} +/- {:.3f}".format(minuit_cs.values['Nsig'], minuit_cs.errors['Nsig']),
              "{:.3f} +/- {:.3f}".format(minuit_cs.values['mu'], minuit_cs.errors['mu']),
              "{:.3f} +/- {:.3f}".format(minuit_cs.values['sigma'], minuit_cs.errors['sigma']),
              "{:.3f} +/- {:.3f}".format(minuit_cs.values['Nbkg'], minuit_cs.errors['Nbkg']),
              "{:.3f} +/- {:.3f}".format(minuit_cs.values['tau'], minuit_cs.errors['tau']),
              ]
else:
    names = ['Entries', 'Chi2/ndf', 'Prob', 'Nbkg', 'tau']
    values = ["{:d}".format(len(x_data)),
              "{:.3f} / {:d}".format(Chi2_value_cs, Ndof_cs),
              "{:.3f}".format(ProbChi2_cs),
              "{:.3f} +/- {:.3f}".format(minuit_cs.values['Nbkg'], minuit_cs.errors['Nbkg']),
              "{:.3f} +/- {:.3f}".format(minuit_cs.values['tau'], minuit_cs.errors['tau']),
              ]

ax.text(0.02, 0.95, nice_string_output(names, values, 0), family='monospace', 
        transform=ax.transAxes, fontsize=9, color='red', verticalalignment='top')


# Binned Likelihood fit:
# ----------------------
binned_likelihood = BinnedLH(PDFmodel_BL, x_data, bins=Nbins, bound=(xmin, xmax), extended=True)
if IncludeSignalInFit:
    minuit_bl = Minuit(binned_likelihood, pedantic=False, print_level=0, Nsig=Nsigx, mu=mux, sigma=sigmax, Nbkg=Nbkgx, tau=taux)
else:
    minuit_bl = Minuit(binned_likelihood, pedantic=False, print_level=0, Nbkg=Nbkgx, tau=taux)
minuit_bl.migrad()                                                         # Perform the actual fit
if (not minuit_bl.get_fmin().is_valid) :                                   # Check if the fit converged
    print("  WARNING: The binned likelihood fit DID NOT converge!!!")

minuit_output = [minuit_bl.get_fmin(), minuit_bl.get_param_states()]       # Save the output parameters in case needed
BLLH_value = minuit_bl.fval                                                # The LLH value
for name in minuit_bl.parameters:
    print("  Binned LLH Fit result: {0} = {1:.5f} +/- {2:.5f}".format(name, minuit_bl.values[name], minuit_bl.errors[name]))

# Plot fit result on top of histograms:
x_blfit = np.linspace(xmin, xmax, 1000)                                    # Create the x-axis for the plot of the fitted function
y_blfit = binwidth*PDFmodel_BL(x_blfit, *minuit_bl.args)
ax.plot(x_blfit, y_blfit, '-', color='green', linewidth=2, label='Binned LLH fit')


Chi2_value_bl, Ndof_bl = calc_chi2(x, y, sy, PDFmodel_BL, minuit_bl.args, binwidth)
ProbChi2_bl = stats.chi2.sf(Chi2_value_bl, Ndof_bl)


# Show the useful histogram and fit information:
if IncludeSignalInFit:
    names = ['Entries', 'Chi2/ndf', 'Prob', 'Nsig', 'mu', 'sigma', 'Nbkg', 'tau']
    values = ["{:d}".format(len(x_data)),
              "{:.3f} / {:d}".format(Chi2_value_bl, Ndof_bl),
              "{:.3f}".format(ProbChi2_bl),
              "{:.3f} +/- {:.3f}".format(minuit_bl.values['Nsig'], minuit_bl.errors['Nsig']),
              "{:.3f} +/- {:.3f}".format(minuit_bl.values['mu'], minuit_bl.errors['mu']),
              "{:.3f} +/- {:.3f}".format(minuit_bl.values['sigma'], minuit_bl.errors['sigma']),
              "{:.3f} +/- {:.3f}".format(minuit_bl.values['Nbkg'], minuit_bl.errors['Nbkg']),
              "{:.3f} +/- {:.3f}".format(minuit_bl.values['tau'], minuit_bl.errors['tau']),
              ]
else:
    names = ['Entries', 'Chi2/ndf', 'Prob', 'Nbkg', 'tau']
    values = ["{:d}".format(len(x_data)),
              "{:.3f} / {:d}".format(Chi2_value_bl, Ndof_bl),
              "{:.3f}".format(ProbChi2_bl),
              "{:.3f} +/- {:.3f}".format(minuit_bl.values['Nbkg'], minuit_bl.errors['Nbkg']),
              "{:.3f} +/- {:.3f}".format(minuit_bl.values['tau'], minuit_bl.errors['tau']),
              ]
ax.text(0.2, 0.95, nice_string_output(names, values, 0), family='monospace', 
        transform=ax.transAxes, fontsize=9, color='green', verticalalignment='top')


# Unbinned Likelihood fit:
# ----------------------
unbinned_likelihood = UnbinnedLH(PDFmodel_UL, x_data, extended=True)
if IncludeSignalInFit:
    minuit_ul = Minuit(unbinned_likelihood, pedantic=False, print_level=0, Nsig=Nsigx, mu=mux, sigma=sigmax, Nbkg=Nbkgx, tau=taux)
else:
    minuit_ul = Minuit(unbinned_likelihood, pedantic=False, print_level=0, Nbkg=Nbkgx, tau=taux)
minuit_ul.migrad()                                                         # Perform the actual fit
if (not minuit_ul.get_fmin().is_valid) :                                   # Check if the fit converged
    print("  WARNING: The unbinned likelihood fit DID NOT converge!!!")

minuit_output = [minuit_ul.get_fmin(), minuit_ul.get_param_states()]       # Save the output parameters in case needed
ULLH_value = minuit_ul.fval                                                # The LLH value
for name in minuit_ul.parameters:
    print("  Unbinned LLH Fit result: {0} = {1:.5f} +/- {2:.5f}".format(name, minuit_ul.values[name], minuit_ul.errors[name]))

# Plot fit result on top of histograms:
x_ulfit = np.linspace(xmin, xmax, 1000)                                    # Create the x-axis for the plot of the fitted function
y_ulfit = binwidth*PDFmodel_UL(x_ulfit, *minuit_ul.args)
ax.plot(x_ulfit, y_ulfit, '--', color='black', linewidth=2, label='Unbinned LLH fit')


Chi2_value_ul, Ndof_ul = calc_chi2(x, y, sy, PDFmodel_UL, minuit_ul.args, binwidth)
ProbChi2_ul = stats.chi2.sf(Chi2_value_ul, Ndof_ul)


# Show the useful histogram and fit information:
if IncludeSignalInFit:
    names = ['Entries', 'Chi2/ndf', 'Prob', 'Nsig', 'mu', 'sigma', 'Nbkg', 'tau']
    values = ["{:d}".format(len(x_data)),
              "{:.3f} / {:d}".format(Chi2_value_ul, Ndof_ul),
              "{:.3f}".format(ProbChi2_ul),
              "{:.3f} +/- {:.3f}".format(minuit_ul.values['Nsig'], minuit_ul.errors['Nsig']),
              "{:.3f} +/- {:.3f}".format(minuit_ul.values['mu'], minuit_ul.errors['mu']),
              "{:.3f} +/- {:.3f}".format(minuit_ul.values['sigma'], minuit_ul.errors['sigma']),
              "{:.3f} +/- {:.3f}".format(minuit_ul.values['Nbkg'], minuit_ul.errors['Nbkg']),
              "{:.3f} +/- {:.3f}".format(minuit_ul.values['tau'], minuit_ul.errors['tau']),
              ]
else:
    names = ['Entries', 'Chi2/ndf', 'Prob', 'Nbkg', 'tau']
    values = ["{:d}".format(len(x_data)),
              "{:.3f} / {:d}".format(Chi2_value_ul, Ndof_ul),
              "{:.3f}".format(ProbChi2_ul),
              "{:.3f} +/- {:.3f}".format(minuit_ul.values['Nbkg'], minuit_ul.errors['Nbkg']),
              "{:.3f} +/- {:.3f}".format(minuit_ul.values['tau'], minuit_ul.errors['tau']),
              ]
    
ax.text(0.4, 0.95, nice_string_output(names, values, 0), family='monospace', 
        transform=ax.transAxes, fontsize=9, color='black', verticalalignment='top')

fig.tight_layout()
ax.legend()
plt.show(block=False)

if Saveplots:
    fig.savefig("ExampleLikelihoodFit", dpi=600)


# Finally, ensure that the program does not termine (and the plot disappears), before you press enter:
try:
    __IPYTHON__
except:
    raw_input('Press Enter to exit')


#----------------------------------------------------------------------------------
#
#  This exercise is meant to improve your understanding but also your experience with
#  ChiSquare and Likelihood fits. The below are indicative questions, but you should
#  also "play around" yourself to see, if you understand how the three behave, when you
#  change conditions.
#
# Questions:
# ----------
#  1) At first run the program without a signal peak. What is the main difference between
#     the three fits? Do they give the same values? The same errors? And is the model good?
#     Also, try to lower the "background lifetime" (taux) to get more empty bins.
#
#  2) Try to give "mu" an initial starting value in the fit away from the true one, and
#     see if the fit can find the position of the signal peak and hence converge.
#     In general, how far can the initial parameter values stray from the true ones,
#     and still find the true minimum? When does it still converge?
#
#  3) Try to put fewer events in the signal peak, and see at what point you can no
#     longer see the peak by eye. Then consider the significance (i.e. the number of
#     sigma's) of Nsig. Does the size of this correspond well with your observation?
#
#  4) What helps the most to discover a signal peak:
#      - Removing half the background below the peak.
#      - Improving the resolution (sigma) by a factor two.
#     Discuss first what you think (and why), and then try it out with this fit.
#
# Advanced questions:
# -------------------
#  1) Make a loop to repeat the two fits many times, and see how the result of the
#     fit compares to the true values. Which fit is least biased? And does this depend
#     on the value of the input parameters?
#
#  2) The likelihood value does not in itself tell you much. But if repeated many times
#     (as above) it can be used to evaluate the fit at hand. Try to produce new data and
#     fit it 1000 times, and see the distribution of likelihood values obtained... and
#     then compare the "original data LLH value" with this distribution! From this, one
#     can actually obtain a probability for the LLH in the same way as for the ChiSquare.
#
#----------------------------------------------------------------------------------