#!/usr/bin/env python

# ----------------------------------------------------------------------------------- #
# 
#   This Python/ROOT macro illustrates how to do a linear fit analytically.
# 
#   References:
#     Bevington: Chapter 6
# 
#   Author: Troels C. Petersen (NBI)
#   Email:  petersen@nbi.dk
#   Date:   17th of November 2016
# 
# ----------------------------------------------------------------------------------- #

from ROOT import *
from array import array
from math import sqrt

# ----------------------------------------------------------------------------------- #
def sqr( a ) : 
    return a*a
# ----------------------------------------------------------------------------------- #


# ----------------------------------------------------------------------------------- #
# Analytic LINEAR fit:
# ----------------------------------------------------------------------------------- #

# Settings for statistics box:
gStyle.SetOptStat(1111)
gStyle.SetOptFit(1111)

# Random numbers from the Mersenne-Twister:
r = TRandom3()
r.SetSeed(1)    # Initializing the random numbers using 1 as seed. Put "0" to use the time (i.e. random)!


# ------------------------------------------------------------------ #
# Loop over generating and fitting data:
# ------------------------------------------------------------------ #

Npoints = 9

# Create arrays for graph (needed by ROOT):
x  = array( 'f', [0.0]*Npoints )   # Create an array of length Npoints with zeros.
ex = array( 'f', [0.0]*Npoints )   # [X]*N is just a python syntax to initialize
y  = array( 'f', [0.0]*Npoints )   # a list of values X.
ey = array( 'f', [0.0]*Npoints )   

alpha0 = 3.6
alpha1 = 0.3
sigmay = 1.0

# Generate data:
# --------------
for i in range( Npoints ) : 
    x[i]  = float( i+1 )
    ex[i] = 0.0
    y[i]  = alpha0 + alpha1 * x[i] + r.Gaus( 0.0 ,sigmay )
    ey[i] = sigmay
    

# ----------------------
# Fit data analytically:
# ----------------------
# The linear fit can be done analytically, as is outlined here:
#   http://www.nbi.dk/~petersen/Teaching/Stat2013/StraightLineFit.pdf

# Define the sums needed for the calculation:
s   = 0.0
sx  = 0.0
sxx = 0.0
sy  = 0.0
sxy = 0.0

# Loop over data to calculate relevant sums:
for i in range( Npoints ) : 
    s   += 1.0
    sx  += x[i]
    sxx += sqr(x[i])
    sy  += y[i]
    sxy += x[i] * y[i]

# Use sums in calculations:
Delta = sxx * s - sqr(sx)
alpha0_calc = (sy  * sxx - sxy * sx) / Delta
alpha1_calc = (sxy * s   - sy  * sx) / Delta
sigma_alpha0_calc = sigmay * sqrt(sxx / Delta)
sigma_alpha1_calc = sigmay * sqrt(s   / Delta)

# So now you have data points and a fit/theory. How to get the fit quality?
# The answer is to calculate the Chi2 and Ndof, and from them get their
# probability using a function (e.g. from ROOT):
Chi2_calc = 0.0
for i in range( Npoints ) : 
    fit_value = alpha0_calc + alpha1_calc * x[i]
    Chi2_calc += sqr((y[i] - fit_value) / ey[i])

Nvar = 2                     # Number of variables (alpha0 and alpha1)
Ndof_calc = Npoints - Nvar   # Number of degrees of freedom

# From Chi2 and Ndof, one can calculate the probability of obtaining this
# or something worse (i.e. higher Chi2). This is a good function to have!
Prob_calc = TMath.Prob( Chi2_calc, Ndof_calc )


# -----------------------------------------
# Fit data numerically (i.e. using Minuit):
# -----------------------------------------
# This is the same fit, but now done numerically and iteratively by Minuit in ROOT:
# It is shorter, but a lot slower!
graph = TGraphErrors( Npoints,x,y,ex,ey )
fit   = TF1("fit", "[0] + [1]*x", 0.5, float( Npoints )+0.5 )
graph.Fit("fit","R")               # The "R" option means fit in the range of the function!

alpha0_fit = fit.GetParameter(0)
alpha1_fit = fit.GetParameter(1)
sigma_alpha0_fit = fit.GetParError(0)
sigma_alpha1_fit = fit.GetParError(1)

# In ROOT, you can just ask the fit function for it:
Chi2_fit = fit.GetChisquare()
Ndof_fit = fit.GetNDF()
Prob_fit = fit.GetProb()

# Let us see, if the calculated and the fitted values agree:
print "  Calc:%6.3f+-%5.3f  %5.3f+-%5.3f   p=%6.4f    Fit:%6.3f+-%5.3f  %5.3f+-%5.3f   p=%6.4f"%(
    alpha0_calc, sigma_alpha0_calc, alpha1_calc, sigma_alpha1_calc, Prob_calc,
    alpha0_fit,  sigma_alpha0_fit,  alpha1_fit,  sigma_alpha1_fit,  Prob_fit)


raw_input( ' ... Press enter to exit ... ' )


#---------------------------------------------------------------------------------- 
# 
# Make sure you've read the relevant references about the ANALYTICAL linear fit,
# and see if you understand the implementation above.
# 
#----------------------------------------------------------------------------------