// ----------------------------------------------------------------------------------- // /* ROOT macro for illustrating a likelihood fit. The example uses as default 100 exponentially distributed random times, with the goal of finding the best estimate of the lifetime, tau = 1. Four estimates are considered: - Mean - Chi-square fit (chi2) - Binned Likelihood fit (bllh) - Unbinned Likelihood fit (llh) While the mean can be simply calculated, the three other methods are based on a scan of values for tau in the range [0.5, 2.0]. For each value of tau, the chi2, bllh, and llh are calculated (in the two likelihood cases, it is actually -2*log(likelihood) which is calculated). This is not really an exercise, but more an attempt to illustrate four things: - How to make a (binned and unbinned) Likelihood function (and fit). - The difference and a comparison between a Chi-square and a (binned) Likelihood. - The difference and a comparison between a binned and unbinned Likelihood. - What goes on behind the scenes in ROOT, when it is asked to fit something. Author: Troels C. Petersen (NBI) Email: petersen@nbi.dk Date: 16th of September 2012 */ // ----------------------------------------------------------------------------------- // // Include from C++: #include #include #include #include // Include from ROOT: #include #include #include #include #include #include #include #include //---------------------------------------------------------------------------------- double sqr(double a) { return a*a; } // ----------------------------------------------------------------------------------- // // Parabola: double func_para(double *x, double *par) { // Parameters are as follows: // par[0]: Minimum value (i.e. constant) // par[1]: Minimum position (i.e. x of minimum) // par[2]: Quadratic term. // ----------------------------------------------------------------------------------- // return par[0] + par[2]*sqr(x[0]-par[1]); } // ----------------------------------------------------------------------------------- // // Double parabola with different slopes on each side of the minimum: double func_asympara(double *x, double *par) { // Parameters are as follows: // par[0]: Minimum value (i.e. constant) // par[1]: Minimum position (i.e. x of minimum) // par[2]: Quadratic term on lower side // par[3]: Quadratic term on higher side // ----------------------------------------------------------------------------------- // if (x[0] < par[1]) return par[0] + par[2]*sqr(x[0]-par[1]); else return par[0] + par[3]*sqr(x[0]-par[1]); } // ----------------------------------------------------------------------------------- // void LikelihoodFit() { // ----------------------------------------------------------------------------------- // // Set the showing of statistics and fitting results (0 means off, 1111 means all on): gROOT->Reset(); gStyle->SetOptStat(1111); gStyle->SetOptFit(1111); // Set the graphics: gStyle->SetStatBorderSize(1); gStyle->SetCanvasColor(0); // Parameters of the problem: const int Ntimes = 100; // Number of times double tau_truth = 1.0; // We choose (like Gods!) the lifetime. vector t; // Vector of times bool verbose = false; // Should the program print (a lot) or not? (true/false) TRandom3 r; int Nbin_exp = 50; // Number of bins in histogram double tmax = 10.0; // Maximum time in histogram double dt = tmax / double(Nbin_exp); TH1F *Hist_Exp = new TH1F("Hist_Exp", "Hist_Exp", Nbin_exp, 0.0, tmax); // ------------------------------------ // Generate data: // ------------------------------------ for (int i=0; i < Ntimes; i++) { t.push_back(-tau_truth*log(r.Uniform())); // Exponential with lifetime tau. // t.push_back(r.Exp(tau_truth)); // Could also use this line! Hist_Exp->Fill(t[i]); if (verbose) printf(" %5.2f", t[i]); } if (verbose) printf("\n\n"); // ------------------------------------ // Analyse data: // ------------------------------------ // Define the number of tau values and their range to test in Chi2 and LLH: // (As we know the "truth", namely tau = 1, the range [0.5, 2.0] seems fitting // for the mean. The number of bins can be increased at will...) const int Ntau = 150; double min_tau = 0.5; double max_tau = 2.0; double delta_tau = (max_tau-min_tau) / Ntau; // Loop over hypothesis for the value of tau and calculate Chi2 and LLH: // --------------------------------------------------------------------- double tau[Ntau+1], chi2[Ntau+1], bllh[Ntau+1], llh[Ntau+1]; double chi2_minval = 999999.0; double chi2_minpos = 0.0; double bllh_minval = 999999.0; double bllh_minpos = 0.0; double llh_minval = 999999.0; double llh_minpos = 0.0; // Now loop of POSSIBLE tau estimates: for (int itau=0; itau < Ntau+1; itau++) { double tau_hypo = min_tau + double(itau)*delta_tau; // Scan in values of tau tau[itau] = tau_hypo; // Chi2 and binned likelihood (from loop over bins in histogram): chi2[itau] = 0.0; bllh[itau] = 0.0; for (int ibin=1; ibin < Nbin_exp; ibin++) { // Note: The number of EXPECTED events is the intergral over the bin! double xlow_bin = Hist_Exp->GetBinCenter(ibin) - dt/2.0; double xhigh_bin = Hist_Exp->GetBinCenter(ibin) + dt/2.0; double nexp = double(Ntimes) * (exp(-xlow_bin/tau_hypo) - exp(-xhigh_bin/tau_hypo)); double nobs = Hist_Exp->GetBinContent(ibin); chi2[itau] += sqr(nobs-nexp) / nexp; // Chi2 summation bllh[itau] += -2.0*log(TMath::Poisson(int(nobs), nexp)); // Binned LLH summation } // Unbinned likelihood (from loop over events): llh[itau] = 0.0; for (int iev=0; iev < Ntimes; iev++) { llh[itau] += -2.0*log(1.0/tau_hypo*exp(-t[iev]/tau_hypo)); } if (verbose) printf(" %3d. tau = %4.2f chi2 = %6.2f log(bllh) = %6.2f log(llh) = %6.2f \n", itau, tau_hypo, chi2[itau], bllh[itau], llh[itau]); // Search for minimum values of chi2, bllh, and llh: if (chi2[itau] < chi2_minval) {chi2_minval = chi2[itau]; chi2_minpos = tau_hypo;} if (bllh[itau] < bllh_minval) {bllh_minval = bllh[itau]; bllh_minpos = tau_hypo;} if (llh[itau] < llh_minval) {llh_minval = llh[itau]; llh_minpos = tau_hypo;} } printf(" Minimum found: chi2 = %7.4f bllh = %7.4f llh = %7.4f \n", chi2_minpos, bllh_minpos, llh_minpos); // ------------------------------------ // Plot and fit results: // ------------------------------------ // ChiSquare: // ---------- TCanvas* c_chi2 = new TCanvas("c_chi2","",80,40,600,450); TGraphErrors* graph_chi2 = new TGraphErrors(Ntau+1, tau, chi2); TF1 *fit_chi2 = new TF1("fit_chi2", func_asympara, chi2_minpos - 0.20, chi2_minpos + 0.30, 4); fit_chi2->SetParameters(chi2_minval, chi2_minpos, 20.0, 20.0); fit_chi2->SetLineColor(4); graph_chi2->SetMarkerStyle(20); graph_chi2->SetMarkerSize(0.4); graph_chi2->Fit("fit_chi2","R"); graph_chi2->Draw("AP"); // Draw lines corresponding to minimum Chi2 and minimum Chi2+1, and draw errors: double minval = fit_chi2->GetParameter(0); double minpos = fit_chi2->GetParameter(1); double err_lower = 1.0 / sqrt(fit_chi2->GetParameter(2)); double err_upper = 1.0 / sqrt(fit_chi2->GetParameter(3)); TLine* line_min = new TLine(chi2_minpos - 0.30, minval, chi2_minpos + 0.30, minval); line_min->Draw(); TLine* line_min1 = new TLine(chi2_minpos - 0.30, minval+1.0, chi2_minpos + 0.30, minval+1.0); line_min1->Draw(); TLine* line_low = new TLine(minpos-err_lower, minval+1.0, minpos-err_lower, minval-2.0); line_low->Draw(); TLine* line_upp = new TLine(minpos+err_upper, minval+1.0, minpos+err_upper, minval-2.0); line_upp->Draw(); printf(" Chi2 fit gives: tau = %6.4f + %6.4f - %6.4f \n", minpos, err_lower, err_upper); // Go through tau values to find minimum and +-1 sigma: // This assumes knowing the minimum value, and Chi2s above Chi2_min+1 if (((chi2[0] - chi2_minval) > 1.0) && ((chi2[Ntau] - chi2_minval) > 1.0)) { bool found_lower = false; bool found_upper = false; double tau_lower, tau_upper; for (int itau=0; itau < Ntau+1; itau++) { if ((!found_lower) && ((chi2[itau] - chi2_minval) < 1.0)) { tau_lower = tau[itau]; found_lower = true; } if ((found_lower) && (!found_upper) && ((chi2[itau] - chi2_minval) > 1.0)) { tau_upper = tau[itau]; found_upper = true; } } printf(" Chi2 scan gives: tau = %6.4f + %6.4f - %6.4f \n", chi2_minpos, chi2_minpos-tau_lower, tau_upper-chi2_minpos); } else { printf("Error: Chi2 values do not fulfill requirements for finding minimum and errors! \n"); } c_chi2->Update(); // c_chi2->SaveAs("FitMinimum_chi2.png"); // Binned Likelihood: // ------------------ TCanvas* c_bllh = new TCanvas("c_bllh","",110,60,600,450); TGraphErrors* graph_bllh = new TGraphErrors(Ntau+1, tau, bllh); TF1 *fit_bllh = new TF1("fit_bllh", func_asympara, bllh_minpos - 0.20, bllh_minpos + 0.20, 4); fit_bllh->SetParameters(bllh_minval, bllh_minpos, 20.0, 20.0); fit_bllh->SetLineColor(4); graph_bllh->SetMarkerStyle(20); graph_bllh->SetMarkerSize(0.4); graph_bllh->Fit("fit_bllh","R"); graph_bllh->Draw("AP"); c_bllh->Update(); // c_bllh->SaveAs("FitMinimum_bllh.png"); // Unbinned Likelihood: // -------------------- TCanvas* c_llh = new TCanvas("c_llh","",140,80,600,450); TGraphErrors* graph_llh = new TGraphErrors(Ntau+1, tau, llh); TF1 *fit_llh = new TF1("fit_llh", func_asympara, llh_minpos - 0.20, llh_minpos + 0.20, 4); fit_llh->SetParameters(llh_minval, llh_minpos, 20.0, 20.0); fit_llh->SetLineColor(4); graph_llh->SetMarkerStyle(20); graph_llh->SetMarkerSize(0.4); graph_llh->Fit("fit_llh","R"); graph_llh->Draw("AP"); c_llh->Update(); // c_llh->SaveAs("FitMinimum_llh.png"); // Fit the data using ROOT (both chi2 and binned likelihood fits): c1 = new TCanvas("c1","",180,110,600,450); Hist_Exp->SetXTitle("Lifetime [s]"); Hist_Exp->SetYTitle("Frequency [ev/0.1s]"); TF1 *rootfit_chi2 = new TF1("rootfit_chi2", "100.0 * 0.20 / [0] * exp(-x/[0])", 0.0, 10.0); rootfit_chi2->SetParameter(0, 1.0); rootfit_chi2->SetLineColor(4); Hist_Exp->Fit("rootfit_chi2","RE"); TF1 *rootfit_bllh = new TF1("rootfit_bllh", "100.0 * 0.20 / [0] * exp(-x/[0])", 0.0, 10.0); rootfit_bllh->SetParameter(0, 1.0); rootfit_bllh->SetLineColor(2); Hist_Exp->Fit("rootfit_bllh","RLE+"); c1->Update(); // c1->SaveAs("GraphFitted.png"); } // ----------------------------------------------------------------------------------- // /* Make sure that you understand how the likelihood is different from the ChiSquare, and how the binned likelihood is different from the unbinned. If you don't do it, this exercise, much of the course and statistics in general will be lost on you! :-) The binned likelihood resembels the ChiSquare, only the evaluation in each bin is a bit different, especially if the number of events in the bin is low, as the PDF considered (Poisson for the LLH, Gaussian for the ChiSquare) is then very different. Questions: ---------- 1) Try to increase the number of exponential numbers you consider to say 10000, and see if the errors become more symetric. Perhaps you will have to also increase the number of points you test (or decrease the range you search!). 2) Try to omit the bins with 0 events in them, when you calculate the Chi2 above. How much does that change? 3) Make a copy of the program and put in a different PDF (i.e. not the exponential). Run it, and see if the errors are still asymetric. For the function, try either e.g. a Uniform or a Gaussian. NOTE: If you want to try other more complex functions, then the method "TH1::FillRandom" might come in handy! Look it up! We'll get to this. */ //----------------------------------------------------------------------------------