#!/usr/bin/env python

# ----------------------------------------------------------------------------------- #
# 
#   This Python macro illustrates the use of ChiSquare as a goodness-of-fit measure,
#   how this distribution comes about, and that it actually works! At the same time, it is
#   also an illustration of how to do a linear fit analytically, that is without running
#   the a minimisation algorithm (in our case called Minuit).
# 
#   The program generates a certain number of datasets, each consisting of 9 points along
#   a line. These are then fitted (both analytically and numerically), and the result and
#   the Chi2 of the fit is recorded along with the probability of the fit.
# 
#   References:
#     Barlow: Chapter 6
#     Cowan: Chapter 2.7, Chapter 7
#     Bevington: Chapter 6
# 
#   Author: Troels C. Petersen (NBI)
#   Email:  petersen@nbi.dk
#   Date:   12th of November 2017
# 
# ----------------------------------------------------------------------------------- #

from __future__ import division, print_function
import numpy as np
import matplotlib.pyplot as plt
from iminuit import Minuit
from probfit import Chi2Regression # , BinnedChi2 # , BinnedLH#, , UnbinnedLH, , , Extended
plt.close('all')

# Function to create a nice string output
def nice_string_output(names, values, extra_spacing = 2):
    max_values = len(max(values, key=len))
    max_names = len(max(names, key=len))
    string = ""
    for name, value in zip(names, values):
        string += "{0:s} {1:>{spacing}} \n".format(name, value,
                   spacing = extra_spacing + max_values + max_names - len(name))
    return string[:-2]


# ----------------------------------------------------------------------------------- #
# Chi Squared Test
# ----------------------------------------------------------------------------------- #

SavePlots = True

r = np.random                         # Random generator
r.seed(42)                            # Set a random seed (but a fixed one - more on that later.)


# ------------------------------------------------------------------ #
# Loop over generating and fitting data:
# ------------------------------------------------------------------ #

Nexp = 1
Npoints = 9

# Parameters used in this problem:
alpha0 = 3.6
alpha1 = 0.3
alpha2 = 0.1
sigmay = 1.0

array_alpha0 = np.zeros(Nexp)
array_alpha1 = np.zeros(Nexp)
array_Chi2 = np.zeros(Nexp)
array_Prob = np.zeros(Nexp)


for iexp in range( Nexp ) : 

    # Generate data:
    # --------------
    x = np.arange(Npoints)+1
    ex = np.zeros_like(x)
    y = alpha0 + alpha1*x + r.normal(0, sigmay, Npoints)
    ey = sigmay*np.ones_like(x)


    # ----------------------
    # Fit data analytically:
    # ----------------------
    # The linear fit can be done analytically, as is outlined here
    #   http://www.nbi.dk/~petersen/Teaching/Stat2017/StraightLineFit.pdf

    # Calculate the relevant sums:
    s = len(x)
    sx = np.sum(x)
    sxx = np.sum(x**2)
    sy = np.sum(y)
    sxy = np.sum(x*y)
    
    # Use sums in calculations:
    Delta = sxx * s - sx**2
    alpha0_calc = (sy  * sxx - sxy * sx) / Delta
    alpha1_calc = (sxy * s   - sy  * sx) / Delta
    sigma_alpha0_calc = sigmay * np.sqrt(sxx / Delta)
    sigma_alpha1_calc = sigmay * np.sqrt(s   / Delta)

    # So now you have data points and a fit/theory. How to get the fit quality?
    # The answer is to calculate the Chi2 and Ndof, and from them get their
    # probability using a function (e.g. from scipy):
    Chi2_calc = 0.0
    for i in range( Npoints ) : 
        fit_value = alpha0_calc + alpha1_calc * x[i]
        Chi2_calc += ((y[i] - fit_value) / ey[i])**2
    
    Nvar = 2                     # Number of variables (alpha0 and alpha1)
    Ndof_calc = Npoints - Nvar   # Number of degrees of freedom
    
    # From Chi2 and Ndof, one can calculate the probability of obtaining this
    # or something worse (i.e. higher Chi2). This is a good function to have!
    from scipy import stats
    Prob_calc =  stats.chi2.sf(Chi2_calc, Ndof_calc) # The chi2 probability given N degrees of freedom (Ndof)
    
    
    # -----------------------------------------
    # Fit data numerically (i.e. using Minuit):
    # -----------------------------------------
    # This is the same fit, but now done numerically and iteratively by Minuit:
    # It is shorter, but a lot slower!
    
    def fit_function(x, alpha0, alpha1):
        return alpha0 + alpha1*x
    
    chi2_object = Chi2Regression(fit_function, x, y, ey) 
    
    minuit = Minuit(chi2_object, pedantic=False, alpha0=1, alpha1=1, print_level=0)  
    minuit.migrad();  # perform the actual fit
    minuit_output = [minuit.get_fmin(), minuit.get_param_states()] # save the output parameters in case needed
    
    alpha0_fit = minuit.values['alpha0']
    alpha1_fit = minuit.values['alpha1']
    sigma_alpha0_fit = minuit.errors['alpha0']
    sigma_alpha1_fit = minuit.errors['alpha0']
    
    # In Minuit, you can just ask the fit function for it:
    Chi2_fit = minuit.fval # the chi2 value
    Prob_fit =  stats.chi2.sf(Chi2_fit, Ndof_calc) # The chi2 probability given N degrees of freedom (Ndof)

    # Let us see, if the calculated and the fitted values agree:
    if (iexp < 25) :
        print("  Calc:{:6.3f}+-{:5.3f}  {:5.3f}+-{:5.3f}   p={:6.4f}    Fit:{:6.3f}+-{:5.3f}  {:5.3f}+-{:5.3f}   p={:6.4f}".format(
                alpha0_calc, sigma_alpha0_calc, alpha1_calc, sigma_alpha1_calc, Prob_calc,
                alpha0_fit,  sigma_alpha0_fit,  alpha1_fit,  sigma_alpha1_fit,  Prob_fit))


    # Fill histograms with fit results:
    array_alpha0[iexp] = alpha0_calc
    array_alpha1[iexp] = alpha1_calc

    array_Chi2[iexp] = Chi2_fit
    array_Prob[iexp] = Prob_fit
    

# ------------------------------------------------------------------ #
# Fit the data and plot the result:
# ------------------------------------------------------------------ #

fig, ax = plt.subplots()
ax.errorbar(x, y, ey, fmt='ro', ecolor='k', elinewidth=1, capsize=2, capthick=1)
ax.plot(x, fit_function(x, *minuit.args), '-r') 

names = ['Chi2/ndf', 'Prob', 'p0', 'p1']
values = ["{:.3f} / {:d}".format(Chi2_fit, Ndof_calc), 
          "{:.3f}".format(Prob_fit), 
          "{:.3f} +/- {:.3f}".format(alpha0_fit, sigma_alpha0_fit),
          "{:.3f} +/- {:.3f}".format(alpha1_fit, sigma_alpha1_fit),
          ]

ax.text(0.55, 0.2, nice_string_output(names, values), family='monospace', transform=ax.transAxes, fontsize=10, verticalalignment='top')


plt.tight_layout()
plt.show(block=False)

if (SavePlots) :
    fig.savefig("FitToLine.pdf")


# ------------------------------------------------------------------ #
# In case we have more than one "experiment" (i.e. fit), we would like
# to see how the fit results and the Chi2 values distribute themselves:

if (Nexp > 1) :
    
    fig2, ax2 = plt.subplots(nrows=2, ncols=2, figsize=(10,10))
    ax2 = ax2.flatten()

    ax2[0].hist(array_alpha0, bins=80, histtype='step')
    ax2[0].set_title('Histogram of alpha0')
    string = " Entries {:>9} \n Mean {:>12.3f} \n RMS {:>13.4f}".format(len(array_alpha0), array_alpha0.mean(), array_alpha0.std(ddof=1))
    ax2[0].text(0.62, 0.95, string, family='monospace', transform=ax2[0].transAxes, fontsize=10, verticalalignment='top')

    ax2[1].hist(array_alpha1, bins=80, histtype='step')
    ax2[1].set_title('Histogram of alpha1')
    string = " Entries {:>9} \n Mean {:>12.3f} \n RMS {:>13.4f}".format(len(array_alpha1), array_alpha1.mean(), array_alpha1.std(ddof=1))
    ax2[1].text(0.05, 0.95, string, family='monospace', transform=ax2[1].transAxes, fontsize=10, verticalalignment='top')

    ax2[2].hist(array_Chi2, bins=20, histtype='step', normed=True)
    ax2[2].set_title('Histogram of Chi2')
    string = " Entries {:>9} \n Mean {:>12.3f} \n RMS {:>13.4f}".format(len(array_Chi2), array_Chi2.mean(), array_Chi2.std(ddof=1))
    ax2[2].text(0.62, 0.95, string, family='monospace', transform=ax2[2].transAxes, fontsize=10, verticalalignment='top')
    x_axis = np.linspace(0, 20, 1000)
    y_axis = stats.chi2.pdf(x_axis, df=7)
    ax2[2].plot(x_axis, y_axis, 'r-')
    ax2[2].text(0.15, 0.05, "Note: This is not a fit!", transform=ax2[2].transAxes, fontsize=10, verticalalignment='top')
    
    ax2[3].hist(array_Prob, bins=20, histtype='step')
    ax2[3].set_title('Histogram of Prob')
    string = " Entries {:>9} \n Mean {:>12.3f} \n RMS {:>13.4f}".format(len(array_Prob), array_Prob.mean(), array_Prob.std(ddof=1))
    ax2[3].text(0.3, 0.2, string, family='monospace', transform=ax2[3].transAxes, fontsize=10, verticalalignment='top')
    

    plt.tight_layout()
    plt.show(block=False)
    if (SavePlots) :
        fig2.savefig('FitChi2dist.pdf')


# Numbers to illustrate the point, that the ratio Chi2/Ndof is only a rule of thumb:
print("\n  Considering the relation between Chi2/Ndof ratio and Prob(Chi2,Ndof) vs. Ndof:")
print("  High number of degrees of freedom:  Prob(240.0,200) = %7.5f"%(stats.chi2.sf(240.0, 200)))
print("  Low number of degrees of freedom:   Prob(  2.4,  2) = %7.5f"%(stats.chi2.sf(2.4, 2)))


# Finally, ensure that the program does not termine (and the plot disappears), before you press enter:
try:
    __IPYTHON__
except:
    raw_input('Press Enter to exit')


#---------------------------------------------------------------------------------- 
# 
# Make sure you've read the relevant references and that you understand not only what
# the ChiSquare is, but also that it follows the ChiSquare distribution, and that the
# probability of obtaining such a ChiSquare or worse (given that the hypothesis/PDF
# is the correct one!) can be calculated from it.
#
# Executive Summary: The ChiSquare method is used both for getting the best fit,
#                    and for testing the quality/probability of this fit.
# 
# Questions:
# ----------
#  1) Run the code such that it does exactly one fit, and take a look at the line fitted.
#     Does this look reasonable, and what is the chance that the input for the data could
#     actually be from a flat distribution, i.e. without the slope?
# 
#  2) Does the fit reproduce the input numbers well (include the uncertainties in your
#     answer)? And do the analytical result(s) (Calc) agree with the numerical one(s) (Fit)?
# 
#  3) Now increase the number of experiments to e.g. 1000, and rerun the macro. Figure
#     out what you see in the window split in 2-by-2, and go through each of these to
#     see, if you understand every feature of the distributions shown, and if you are
#     happy with them! Is this what you expected?
#     Try to make some fits of these output distributions to check, if you're satisfied!
#
#     This somehow makes this the "long" question without any leading questions.
#     But you should try to be statistically minded enough and have an idea of what to
#     look for, at least to some degree :-)
# 
#  4) Investigate if the distributions of probabilities is flat, or if it has some
#     slope to it. Do you understand why the distributions of probabilities is flat?
#     (This is typically conceptually hard to begin with, but don't worry!)
# 
#  5) Find the line where the random points for the line are generated, and add a
#     quadratic term in x with the coefficient 0.1. Run the program again with Nexp=1,
#     and start by looking at the single fit in the graph. Can you see this change?
#     Now run 1000 experiments again, and see what has changed in the distributions
#     in the 2-by-2 window when including such a term, and think what consequences
#     it might have for an experiment.
# 
#     NOTE: The reason why the quadratic term is more clear in the latter case is NOT
#     that it is a better method. It is simply because there is 1000 times more statistics!
#
# 
# Advanced questions:
# -------------------
#  1) Change the coefficient from question 5) to -0.2. Of course the linear fit does
#     not do very well. What changes are needed for the fit to be good again? Make
#     these changes, and see that the condition in question 4) is again met.
# 
#  2) On page 104 in Glen Cowan's book, he lists the conditions under which the ChiSquare
#     distribution is obtained (hypothesis is linear in fit parameters). Try to make a test, where
#     this is not the case (e.g. f(x) = cos(a*x)), and see to what degree this requirement
#     is actually needed.
# 
#----------------------------------------------------------------------------------