#!/usr/bin/env python

# ----------------------------------------------------------------------------------- #
# 
#   This Python/ROOT macro illustrates the use of ChiSquare as a goodness-of-fit measure,
#   how this distribution comes about, and that it actually works, here with three
#   different examples!
#
#   References:
#     Barlow: Chapter 6
#     Cowan: Chapter 2.7, Chapter 7
#     Bevington: Chapter 6
# 
#   Author: Troels C. Petersen (NBI)
#   Email:  petersen@nbi.dk
#   Date:   1st of September 2014
# 
# ----------------------------------------------------------------------------------- #

from ROOT import *
from array import array
from math import sqrt

# ----------------------------------------------------------------------------------- #
def sqr( a ) : 
    return a*a
# ----------------------------------------------------------------------------------- #


# ----------------------------------------------------------------------------------- #
# Chi Squared Test - with three different examples
# ----------------------------------------------------------------------------------- #

gROOT.Reset()

# Setting of general plotting style:
gStyle.SetCanvasColor(0)
gStyle.SetFillColor(0)

# Settings for statistics box:
gStyle.SetOptStat(1111)
gStyle.SetOptFit(1111)

# Random numbers from the Mersenne-Twister:
r = TRandom3()
r.SetSeed(3)    # Initializing the random numbers using 1 as seed. Put "0" to use the time (i.e. random)!

# Text to write on the plot:
text = TLatex()
text.SetNDC()
text.SetTextFont(1)
text.SetTextColor(1)

SavePlots = False

Nexp = 1


# ------------------------------------------------------------------ #
# Generate and fit LINEAR data:
# ------------------------------------------------------------------ #

NpointsLin = 17

# Create arrays for graph (needed by ROOT):
xLin  = array( 'f', [0.0]*NpointsLin )   # Create an array of length Npoints with zeros.
exLin = array( 'f', [0.0]*NpointsLin )   # [X]*N is just a python syntax to initialize
yLin  = array( 'f', [0.0]*NpointsLin )   # a list of values X.
eyLin = array( 'f', [0.0]*NpointsLin )   

alpha0 = 3.6
alpha1 = 0.3
sigmay = 1.0

Hist_Chi2_Lin   = TH1F("Hist_Chi2_Lin", "Hist_Chi2_Lin",  30,  0.0, 60.0)
Hist_Prob_Lin   = TH1F("Hist_Prob_Lin", "Hist_Prob_Lin",  20,  0.0,  1.0)


# Loop over number of experiments to generate data and subsequent Chi2 values:
for iexp in range( Nexp ) : 

    # Generate points:
    for i in range( NpointsLin ) : 
        xLin[i]  = float( i+1 )
        exLin[i] = 0.0
        yLin[i]  = alpha0 + alpha1 * xLin[i] + r.Gaus(0.0, sigmay)
        eyLin[i] = sigmay

    # Fit points:
    graphLin = TGraphErrors(NpointsLin, xLin, yLin, exLin, eyLin)
    fitLin   = TF1("fitLin", "[0] + [1]*x", 0.5, float(NpointsLin)+0.5)
    fitLin.SetParameters(alpha0, alpha1)
    graphLin.Fit("fitLin","RQ")               # The "Q" option means "Quiet", i.e. don't print or plot anything!
    Hist_Chi2_Lin.Fill(fitLin.GetChisquare())
    Hist_Prob_Lin.Fill(fitLin.GetProb())


# ------------------------------------------------------------------ #
# Generate and fit OSCILLATING data:
# ------------------------------------------------------------------ #

NpointsOsc = 19

# Create arrays for graph (needed by ROOT):
xOsc  = array( 'f', [0.0]*NpointsOsc )   # Create an array of length Npoints with zeros.
exOsc = array( 'f', [0.0]*NpointsOsc )   # [X]*N is just a python syntax to initialize
yOsc  = array( 'f', [0.0]*NpointsOsc )   # a list of values X.
eyOsc = array( 'f', [0.0]*NpointsOsc )   

mean  = 1.6
amp   = 3.3
omega = 0.7
phase = 0.3
sigmay = 0.5

Hist_Chi2_Osc   = TH1F("Hist_Chi2_Osc", "Hist_Chi2_Osc",  30,  0.0, 60.0)
Hist_Prob_Osc   = TH1F("Hist_Prob_Osc", "Hist_Prob_Osc",  20,  0.0,  1.0)


# Loop over number of experiments to generate data and subsequent Chi2 values:
for iexp in range( Nexp ) : 

    # Generate points:
    for i in range( NpointsOsc ) : 
        xOsc[i]  = float( i+1 )
        exOsc[i] = 0.0
        yOsc[i]  = mean + amp * cos(omega*xOsc[i] + phase) + r.Gaus(0.0, sigmay)
        eyOsc[i] = sigmay

    # Fit points:
    graphOsc = TGraphErrors(NpointsOsc, xOsc, yOsc, exOsc, eyOsc)
    fitOsc   = TF1("fitOsc", "[0] + [1]*cos([2]*x+[3])", 0.5, float(NpointsOsc)+0.5)
    fitOsc.SetParameters(mean, amp, omega, phase)
    graphOsc.Fit("fitOsc","RQ")               # The "Q" option means "Quiet", i.e. don't print or plot anything!
    Hist_Chi2_Osc.Fill(fitOsc.GetChisquare())
    Hist_Prob_Osc.Fill(fitOsc.GetProb())


# ------------------------------------------------------------------ #
# Generate and fit EXPONENTIAL binned data:
# ------------------------------------------------------------------ #

NpointsExp = 1000
NbinsExp = 17
Hist_Exp = TH1F("Hist_Exp", "Hist_Exp", NbinsExp, 0.0, 17.0)

tau = 3.1

Hist_Chi2_Exp   = TH1F("Hist_Chi2_Exp", "Hist_Chi2_Exp",  30,  0.0, 60.0)
Hist_Prob_Exp   = TH1F("Hist_Prob_Exp", "Hist_Prob_Exp",  20,  0.0,  1.0)


# Loop over number of experiments to generate data and subsequent Chi2 values:
for iexp in range( Nexp ) : 

    # Generate points:
    Hist_Exp.Reset()
    for i in range( NpointsExp ) : 
        Hist_Exp.Fill(r.Exp(tau))

    # Fit points:
    fitExp = TF1("fitExp", "[0]*exp(-x/[1])", 0.0, float(NbinsExp))
    fitExp.SetParameters(NpointsExp, tau)
    Hist_Exp.Fit("fitExp","RQN")               # "R": fit in Range, "Q": Quiet (no printing!), "N": No plot.
    Hist_Chi2_Exp.Fill(fitExp.GetChisquare())
    Hist_Prob_Exp.Fill(fitExp.GetProb())


# ------------------------------------------------------------------ #
# Fit the data and plot the result:
# ------------------------------------------------------------------ #

# Make a white canvas and draw an example fit in:
c0 = TCanvas("c0", "", 100, 20, 1200, 500)
c0.Divide(3,1)

c0.cd(1)
graphLin.SetLineWidth(2)
graphLin.SetMarkerStyle(20)
graphLin.SetMarkerColor(2)
graphLin.Draw("AP")

c0.cd(2)
graphOsc.SetLineWidth(2)
graphOsc.SetMarkerStyle(20)
graphOsc.SetMarkerColor(2)
graphOsc.Draw("AP")

c0.cd(3)
Hist_Exp.SetLineWidth(2)
fitExp2 = TF1("fitExp2", "[0]*exp(-x/[1])", 0.0, float(NbinsExp))
fitExp2.SetParameters(NpointsExp, tau)
Hist_Exp.Fit("fitExp2","R")
Hist_Exp.Draw("e")             # "e" stands for "with errors"

# Save to file:
c0.Update()
if (SavePlots) : c0.SaveAs("Chi2Dist_SeveralExamples.pdf")


# If there have been more than one experiment, then make another white canvas:
if (Nexp > 1) : 
    c1 = TCanvas("c1", "", 150, 50, 1200, 500)
    c1.Divide(3,1)                  # Divide the canvas into 2-by-2 subcanvas
 
    c1.cd(1)                            # Go to the first subcanvas
    Hist_Chi2_Lin.Sumw2()               # Ensure that ROOT calculates/propagate the uncertainties correctly.
    Hist_Chi2_Lin.DrawNormalized("e")   # Here I draw it normalized, so that the function will match!
    f1a = TF1("f1a","2.0*ROOT::Math::chisquared_pdf(x,15)", 0, 60)     # Chi-square function with Ndof=15
    f1a.Draw("same")                    # Draw it on top of the histogram.

    c1.cd(2)                            # Go to the second subcanvas
    Hist_Chi2_Osc.Sumw2()               # Ensure that ROOT calculates/propagate the uncertainties correctly.
    Hist_Chi2_Osc.DrawNormalized("e")   # Here I draw it normalized, so that the function will match!
    f1b = TF1("f1b","2.0*ROOT::Math::chisquared_pdf(x,15)", 0, 60)     # Chi-square function with Ndof=15
    f1b.Draw("same")                    # Draw it on top of the histogram.

    c1.cd(3)                            # Go to the third subcanvas
    Hist_Chi2_Exp.Sumw2()               # Ensure that ROOT calculates/propagate the uncertainties correctly.
    Hist_Chi2_Exp.DrawNormalized("e")   # Here I draw it normalized, so that the function will match!
    f1c = TF1("f1c","2.0*ROOT::Math::chisquared_pdf(x,15)", 0, 60)     # Chi-square function with Ndof=15
    f1c.Draw("same")                    # Draw it on top of the histogram.

    # Save to file:
    c1.Update()
    if (SavePlots) : c1.SaveAs("Chi2Dist_SeveralCases.pdf")


raw_input( ' ... Press enter to exit ... ' )


#---------------------------------------------------------------------------------- 
# 
# Make sure you've read the relevant references and that you understand not only what
# the ChiSquare is, but also that it follows the ChiSquare distribution, and that the
# probability of obtaining such a ChiSquare or worse can be calculated from it.
# 
# The program generates a certain number of datasets in three different ways, each
# with 15 degrees of freedom in the fit, from which the Chi2 of the fit is recorded.
# 
# 
# Questions:
# ----------
#  1) In the example of the linear fit, what number of points lies outside +-1 sigma?
#     Is that a reasonable number?
#
#  2) In the oscillatory case, try to drop the line with "SetParameters", and see how
#     well the fit goes, when it does not have good starting values.
#
#  3) In the exponential fit, where do the uncertainties come from? And is the fit
#     reasonable?
# 
# 
# Advanced questions:
# -------------------
#  1) Why does the last of the three Chi2 distributions not fit quite?
#     Try to change the number of generated points to 100 instead of 1000,
#     and/or change the lifetime to tau=2.1. Does this increase the mismatch
#     of the Chi2 distribution. Does that give you a hint why?
# 
#----------------------------------------------------------------------------------