// ----------------------------------------------------------------------------------- // /* ROOT macro for fitting points with errors to a user defined function. This ROOT macro illustrates the use of ChiSquare as a goodness-of-fit measure, how this distribution comes about, and that it actually works! At the same time, it is also an illustration of how to do a linear fit analytically, that is without running the fitting algorithm (called Minuit). References: Barlow: Chapter 6 Cowan: Chapter 2.7, Chapter 7 Bevington: Chapter 6 Author: Troels C. Petersen (CERN) Email: petersen@nbi.dk Date: 12th of September 2011 */ // ----------------------------------------------------------------------------------- // // ----------------------------------------------------------------------------------- // double sqr(double a) { // ----------------------------------------------------------------------------------- // return a*a; } // ----------------------------------------------------------------------------------- // void ChiSquareTest() { // ----------------------------------------------------------------------------------- // gROOT->Reset(); // Setting of general plotting style: gStyle->SetCanvasColor(0); gStyle->SetFillColor(0); // Settings for statistics box: gStyle->SetOptStat(1111); gStyle->SetOptFit(1111); // Random numbers from the Mersenne-Twister: TRandom3 r; // r.SetSeed(0); // Initializing the random numbers! TLatex *text = new TLatex(); text->SetNDC(); text->SetTextFont(1); text->SetTextColor(1); // ------------------------------------------------------------------ // // Make Histograms: // ------------------------------------------------------------------ // TH1F* Hist_alpha0 = new TH1F("Hist_alpha0", "Hist_alpha0", 80, 0.0, 8.0); TH1F* Hist_alpha1 = new TH1F("Hist_alpha1", "Hist_alpha1", 80, -0.25, 1.35); TH1F* Hist_Chi2 = new TH1F("Hist_Chi2", "Hist_Chi2", 20, 0.0, 20.0); TH1F* Hist_Prob = new TH1F("Hist_Prob", "Hist_Prob", 20, 0.0, 1.0); TGraphErrors* graph; // ------------------------------------------------------------------ // // Loop over generating and fitting data: // ------------------------------------------------------------------ // const int Nexp = 1000; const int Npoints = 9; double x[Npoints]; double ex[Npoints]; double y[Npoints]; double ey[Npoints]; double alpha0 = 3.6; double alpha1 = 0.3; double sigmay = 1.0; for (int iexp=0; iexp < Nexp; iexp++) { // Generate data: // -------------- for (int i=0; i < Npoints; i++) { x[i] = double(i+1); ex[i] = 0.0; y[i] = alpha0 + alpha1 * x[i] + r.Gaus(0.0,sigmay); ey[i] = sigmay; } // Save first example: if (iexp == 0) graph = new TGraphErrors(Npoints, x, y, ex, ey); // Fit data analytically: // ---------------------- double s = 0.0; double sx = 0.0; double sxx = 0.0; double sy = 0.0; double sxy = 0.0; for (int i=0; i < Npoints; i++) { s += 1.0; sx += x[i]; sxx += sqr(x[i]); sy += y[i]; sxy += x[i] * y[i]; } double Delta = sxx * s - sqr(sx); double alpha0_calc = (sy * sxx - sxy * sx) / Delta; double alpha1_calc = (sxy * s - sy * sx) / Delta; double sigma_alpha0_calc = sigmay * sqrt(sxx / Delta); double sigma_alpha1_calc = sigmay * sqrt(s / Delta); // Fit data numerically: // --------------------- TGraphErrors* graph = new TGraphErrors(Npoints,x,y,ex,ey); TF1 *fit = new TF1("fit", "[0] + [1]*x", 0.5, 9.5); graph->Fit("fit","rq"); double alpha0_fit = fit->GetParameter(0); double alpha1_fit = fit->GetParameter(1); double sigma_alpha0_fit = fit->GetParError(0); double sigma_alpha1_fit = fit->GetParError(1); double Chi2 = fit->GetChisquare(); double Ndof = fit->GetNDF(); double Prob = fit->GetProb(); if (iexp < 25) printf(" Calc:%6.3f+-%5.3f %5.3f+-%5.3f Fit:%6.3f+-%5.3f %5.3f+-%5.3f Prob=%6.4f \n", alpha0_calc, sigma_alpha0_calc, alpha1_calc, sigma_alpha1_calc, alpha0_fit, sigma_alpha0_fit, alpha1_fit, sigma_alpha1_fit, Prob); // Fill histograms: Hist_alpha0->Fill(alpha0_calc); Hist_alpha1->Fill(alpha1_calc); Hist_Chi2->Fill(Chi2); Hist_Prob->Fill(Prob); } // ------------------------------------------------------------------ // // Fit the data and plot the result: // ------------------------------------------------------------------ // // Make a white canvas and draw the example fit in: c0 = new TCanvas("c0","",100,20,600,450); c0->SetFillColor(0); graph->SetLineWidth(2); graph->SetMarkerStyle(20); graph->SetMarkerColor(2); graph->Draw("AP"); // Make another white canvas: if (Nexp > 1) { c1 = new TCanvas("c1","",750,260,600,450); c1->SetFillColor(0); c1->Divide(2,2); c1->cd(1); Hist_alpha0->Draw(); c1->cd(2); Hist_alpha1->Draw(); c1->cd(3); Hist_Chi2->Sumw2(); Hist_Chi2->DrawNormalized("e"); TF1 *f1a = new TF1("f1a","ROOT::Math::chisquared_pdf(x,7)", 0, 20); f1a->Draw("same"); // Text: text->SetTextSize(0.06); text->DrawLatex(0.20, 0.16, "NOTE: This is not a fit!"); c1->cd(4); Hist_Prob->SetMinimum(0.0); Hist_Prob->Draw("e"); // Save to file: c1->Update(); // c1->SaveAs("FitChi2dist.eps"); } } //---------------------------------------------------------------------------------- /* Make sure you've read the relevant references and that you understand not only what the ChiSquare is, but also that it follows the ChiSquare distribution, and that the probability of obtaining such a ChiSquare or worse can be calculated from it. The program generates a certain number of datasets, each consisting of 9 points along a line. These are then fitted (both analytically and numerically), and the result and the Chi2 of the fit is recorded along with the probability of the fit. The Questions: ---------- 1) Run the code such that it does exactly one fit, and take a look at the line fitted. Does this look reasonable, and what is the chance that the input for the data could actually be from a flat distribution? 2) Does the fit reproduce the input numbers well (include the uncertainties in your answer)? And do the analytical results agree with the numerical ones? 3) Now increase the number of experiments to e.g. 1000, and rerun the macro. Figure out what you see in the window split in 2-by-2, and go through each of these to see, if you understand every feature of the distributions shown, and if you are happy with them! This somehow makes this the "long" question without any leading questions, but you should by now be statistically minded enough to know what to look for, at least to some degree :-) 4) Investigate if the distributions of probabilities is flat, or if it has some slope to it. Do you understand why the distributions of probabilities is flat? 5) Find the line where the random points for the line are generated, and add a quadratic term in x with the coefficient 0.1. Run the program again with Nexp=1, and start by looking at the single fit in the graph. Can you see this change? Now run 1000 experiments again, and see what has changed in the distributions in the 2-by-2 window when including such a term, and think what consequences it might have for an experiment. Advanced questions: ------------------- 1) Change the coefficient from question 5) to -0.2. Of course the linear fit does not do very well. What changes are needed for the fit to be good again? Make these changes, and see that the condition in question 4) is again met. 2) On page 104 Glen Cowan lists the conditions under which the ChiSquare distribution is obtained (hypothesis is linear in fit parameters). Try to make a test, where this is not the case (e.g. f(x) = cos(a*x)), and see to what degree this requirement is actually needed. */ //----------------------------------------------------------------------------------