#!/usr/bin/env python # ----------------------------------------------------------------------------------- # # # This Python/ROOT macro illustrates the use of ChiSquare as a goodness-of-fit measure, # how this distribution comes about, and that it actually works, here with three # different examples! # # References: # Barlow: Chapter 6 # Cowan: Chapter 2.7, Chapter 7 # Bevington: Chapter 6 # # Author: Troels C. Petersen (NBI) # Email: petersen@nbi.dk # Date: 1st of September 2014 # # ----------------------------------------------------------------------------------- # from ROOT import * from array import array from math import sqrt # ----------------------------------------------------------------------------------- # def sqr( a ) : return a*a # ----------------------------------------------------------------------------------- # # ----------------------------------------------------------------------------------- # # Chi Squared Test - with three different examples # ----------------------------------------------------------------------------------- # gROOT.Reset() # Setting of general plotting style: gStyle.SetCanvasColor(0) gStyle.SetFillColor(0) # Settings for statistics box: gStyle.SetOptStat(1111) gStyle.SetOptFit(1111) # Random numbers from the Mersenne-Twister: r = TRandom3() r.SetSeed(3) # Initializing the random numbers using 1 as seed. Put "0" to use the time (i.e. random)! # Text to write on the plot: text = TLatex() text.SetNDC() text.SetTextFont(1) text.SetTextColor(1) SavePlots = False Nexp = 1 # ------------------------------------------------------------------ # # Generate and fit LINEAR data: # ------------------------------------------------------------------ # NpointsLin = 17 # Create arrays for graph (needed by ROOT): xLin = array( 'f', [0.0]*NpointsLin ) # Create an array of length Npoints with zeros. exLin = array( 'f', [0.0]*NpointsLin ) # [X]*N is just a python syntax to initialize yLin = array( 'f', [0.0]*NpointsLin ) # a list of values X. eyLin = array( 'f', [0.0]*NpointsLin ) alpha0 = 3.6 alpha1 = 0.3 sigmay = 1.0 Hist_Chi2_Lin = TH1F("Hist_Chi2_Lin", "Hist_Chi2_Lin", 30, 0.0, 60.0) Hist_Prob_Lin = TH1F("Hist_Prob_Lin", "Hist_Prob_Lin", 20, 0.0, 1.0) # Loop over number of experiments to generate data and subsequent Chi2 values: for iexp in range( Nexp ) : # Generate points: for i in range( NpointsLin ) : xLin[i] = float( i+1 ) exLin[i] = 0.0 yLin[i] = alpha0 + alpha1 * xLin[i] + r.Gaus(0.0, sigmay) eyLin[i] = sigmay # Fit points: graphLin = TGraphErrors(NpointsLin, xLin, yLin, exLin, eyLin) fitLin = TF1("fitLin", "[0] + [1]*x", 0.5, float(NpointsLin)+0.5) fitLin.SetParameters(alpha0, alpha1) graphLin.Fit("fitLin","RQ") # The "Q" option means "Quiet", i.e. don't print or plot anything! Hist_Chi2_Lin.Fill(fitLin.GetChisquare()) Hist_Prob_Lin.Fill(fitLin.GetProb()) # ------------------------------------------------------------------ # # Generate and fit OSCILLATING data: # ------------------------------------------------------------------ # NpointsOsc = 19 # Create arrays for graph (needed by ROOT): xOsc = array( 'f', [0.0]*NpointsOsc ) # Create an array of length Npoints with zeros. exOsc = array( 'f', [0.0]*NpointsOsc ) # [X]*N is just a python syntax to initialize yOsc = array( 'f', [0.0]*NpointsOsc ) # a list of values X. eyOsc = array( 'f', [0.0]*NpointsOsc ) mean = 1.6 amp = 3.3 omega = 0.7 phase = 0.3 sigmay = 0.5 Hist_Chi2_Osc = TH1F("Hist_Chi2_Osc", "Hist_Chi2_Osc", 30, 0.0, 60.0) Hist_Prob_Osc = TH1F("Hist_Prob_Osc", "Hist_Prob_Osc", 20, 0.0, 1.0) # Loop over number of experiments to generate data and subsequent Chi2 values: for iexp in range( Nexp ) : # Generate points: for i in range( NpointsOsc ) : xOsc[i] = float( i+1 ) exOsc[i] = 0.0 yOsc[i] = mean + amp * cos(omega*xOsc[i] + phase) + r.Gaus(0.0, sigmay) eyOsc[i] = sigmay # Fit points: graphOsc = TGraphErrors(NpointsOsc, xOsc, yOsc, exOsc, eyOsc) fitOsc = TF1("fitOsc", "[0] + [1]*cos([2]*x+[3])", 0.5, float(NpointsOsc)+0.5) fitOsc.SetParameters(mean, amp, omega, phase) graphOsc.Fit("fitOsc","RQ") # The "Q" option means "Quiet", i.e. don't print or plot anything! Hist_Chi2_Osc.Fill(fitOsc.GetChisquare()) Hist_Prob_Osc.Fill(fitOsc.GetProb()) # ------------------------------------------------------------------ # # Generate and fit EXPONENTIAL binned data: # ------------------------------------------------------------------ # NpointsExp = 1000 NbinsExp = 17 Hist_Exp = TH1F("Hist_Exp", "Hist_Exp", NbinsExp, 0.0, 17.0) tau = 3.1 Hist_Chi2_Exp = TH1F("Hist_Chi2_Exp", "Hist_Chi2_Exp", 30, 0.0, 60.0) Hist_Prob_Exp = TH1F("Hist_Prob_Exp", "Hist_Prob_Exp", 20, 0.0, 1.0) # Loop over number of experiments to generate data and subsequent Chi2 values: for iexp in range( Nexp ) : # Generate points: Hist_Exp.Reset() for i in range( NpointsExp ) : Hist_Exp.Fill(r.Exp(tau)) # Fit points: fitExp = TF1("fitExp", "[0]*exp(-x/[1])", 0.0, float(NbinsExp)) fitExp.SetParameters(NpointsExp, tau) Hist_Exp.Fit("fitExp","RQN") # "R": fit in Range, "Q": Quiet (no printing!), "N": No plot. Hist_Chi2_Exp.Fill(fitExp.GetChisquare()) Hist_Prob_Exp.Fill(fitExp.GetProb()) # ------------------------------------------------------------------ # # Fit the data and plot the result: # ------------------------------------------------------------------ # # Make a white canvas and draw an example fit in: c0 = TCanvas("c0", "", 100, 20, 1200, 500) c0.Divide(3,1) c0.cd(1) graphLin.SetLineWidth(2) graphLin.SetMarkerStyle(20) graphLin.SetMarkerColor(2) graphLin.Draw("AP") c0.cd(2) graphOsc.SetLineWidth(2) graphOsc.SetMarkerStyle(20) graphOsc.SetMarkerColor(2) graphOsc.Draw("AP") c0.cd(3) Hist_Exp.SetLineWidth(2) fitExp2 = TF1("fitExp2", "[0]*exp(-x/[1])", 0.0, float(NbinsExp)) fitExp2.SetParameters(NpointsExp, tau) Hist_Exp.Fit("fitExp2","R") Hist_Exp.Draw("e") # "e" stands for "with errors" # Save to file: c0.Update() if (SavePlots) : c0.SaveAs("Chi2Dist_SeveralExamples.pdf") # If there have been more than one experiment, then make another white canvas: if (Nexp > 1) : c1 = TCanvas("c1", "", 150, 50, 1200, 500) c1.Divide(3,1) # Divide the canvas into 2-by-2 subcanvas c1.cd(1) # Go to the first subcanvas Hist_Chi2_Lin.Sumw2() # Ensure that ROOT calculates/propagate the uncertainties correctly. Hist_Chi2_Lin.DrawNormalized("e") # Here I draw it normalized, so that the function will match! f1a = TF1("f1a","2.0*ROOT::Math::chisquared_pdf(x,15)", 0, 60) # Chi-square function with Ndof=15 f1a.Draw("same") # Draw it on top of the histogram. c1.cd(2) # Go to the second subcanvas Hist_Chi2_Osc.Sumw2() # Ensure that ROOT calculates/propagate the uncertainties correctly. Hist_Chi2_Osc.DrawNormalized("e") # Here I draw it normalized, so that the function will match! f1b = TF1("f1b","2.0*ROOT::Math::chisquared_pdf(x,15)", 0, 60) # Chi-square function with Ndof=15 f1b.Draw("same") # Draw it on top of the histogram. c1.cd(3) # Go to the third subcanvas Hist_Chi2_Exp.Sumw2() # Ensure that ROOT calculates/propagate the uncertainties correctly. Hist_Chi2_Exp.DrawNormalized("e") # Here I draw it normalized, so that the function will match! f1c = TF1("f1c","2.0*ROOT::Math::chisquared_pdf(x,15)", 0, 60) # Chi-square function with Ndof=15 f1c.Draw("same") # Draw it on top of the histogram. # Save to file: c1.Update() if (SavePlots) : c1.SaveAs("Chi2Dist_SeveralCases.pdf") raw_input( ' ... Press enter to exit ... ' ) #---------------------------------------------------------------------------------- # # Make sure you've read the relevant references and that you understand not only what # the ChiSquare is, but also that it follows the ChiSquare distribution, and that the # probability of obtaining such a ChiSquare or worse can be calculated from it. # # The program generates a certain number of datasets in three different ways, each # with 15 degrees of freedom in the fit, from which the Chi2 of the fit is recorded. # # # Questions: # ---------- # 1) In the example of the linear fit, what number of points lies outside +-1 sigma? # Is that a reasonable number? # # 2) In the oscillatory case, try to drop the line with "SetParameters", and see how # well the fit goes, when it does not have good starting values. # # 3) In the exponential fit, where do the uncertainties come from? And is the fit # reasonable? # # # Advanced questions: # ------------------- # 1) Why does the last of the three Chi2 distributions not fit quite? # Try to change the number of generated points to 100 instead of 1000, # and/or change the lifetime to tau=2.1. Does this increase the mismatch # of the Chi2 distribution. Does that give you a hint why? # #----------------------------------------------------------------------------------