#!/usr/bin/env python # ----------------------------------------------------------------------------------- # # # Python/ROOT macro for illustrating the Binomial, Poisson and Gaussian distributions, # and how they transform into each other (well, into the Gaussian). # # Reference: # Barlow, chapter 3 # # Run this pyROOT macro by: # > ./BinomialPoissonGaussian.py # # Author: Troels C. Petersen (NBI) # Email: petersen@nbi.dk # Date: 19th of November 2016 # # ----------------------------------------------------------------------------------- # from ROOT import * import math r = TRandom3() # Random generator r.SetSeed(4) # Fixed order of random numbers SavePlots = False verbose = True Nverbose = 10 Nexperiments = 1000 def sqr(a) : return a*a #---------------------------------------------------------------------------------- # Loop over process: #---------------------------------------------------------------------------------- # Define process, i.e. number of trials and probability of success: Ntrials = 10 pSuccess = 1.0/6.0 Lambda = Ntrials * pSuccess # This is the mean and also how the Poisson parameter is defined! # Define a histogram with the "data" (note and think about the binning!): Hist_nSuccess = TH1F("Hist_nSuccess", ";Number of successes;Frequency", Ntrials+1, -0.5, Ntrials+0.5) # Run the experiments, and fill the histogram from above: for iexp in range( Nexperiments ) : # Simulating process defined: nSuccess = 0 for i in range( Ntrials ) : x = r.Uniform() if (x < pSuccess) : nSuccess = nSuccess + 1 # Record result: if (verbose and iexp < Nverbose) : print " nSuccess: %4d"%(nSuccess) Hist_nSuccess.Fill(nSuccess) #---------------------------------------------------------------------------------- # Plot result: #---------------------------------------------------------------------------------- canvas = TCanvas( "canvas", "", 50, 50, 1200, 600 ) # canvas.SetLogy() # Set the y-axis to be logarithmic # Draw the histogram: Hist_nSuccess.GetXaxis().SetRangeUser(-0.5, 50.5) Hist_nSuccess.SetLineWidth(3) Hist_nSuccess.SetLineColor(kBlack) Hist_nSuccess.Draw("e") # Draw Binomial: # -------------- def func_Binomial(x, p) : if (x[0] > -0.5) : return p[0] * TMath.Binomial(int(p[1]), int(x[0]+0.5)) * TMath.Power(p[2],int(x[0]+0.5)) * TMath.Power(1-p[2], int(p[1])-int(x[0]+0.5)) else : return 0.0 fBinomial = TF1("fBinomial", func_Binomial, -0.5, Ntrials+0.5, 3) fBinomial.SetParameters(Nexperiments, Ntrials, pSuccess) fBinomial.SetNpx(1000) # Set the number of intervals used when drawing function! fBinomial.SetLineColor(kBlue) fBinomial.Draw("same") # Draw Poisson: # ------------- def func_Poisson(x, p) : if (x[0] > -0.5) : return p[0] * TMath.PoissonI(x[0]+0.5, p[1]) else : return 0.0 fPoisson = TF1("fPoisson", func_Poisson, -0.5, Ntrials+0.5, 2) fPoisson.SetParameters(Nexperiments, Lambda) fPoisson.SetNpx(1000) # Set the number of intervals used when drawing function! fPoisson.SetLineColor(kMagenta) fPoisson.Draw("same") # Draw Gaussian: # ------------- def func_Gaussian(x, p) : z = ((x[0]) - p[1]) / p[2] return p[0] / sqrt(2.0*TMath.Pi()) / p[2] * exp(-0.5 *z*z) fGaussian = TF1("fGaussian", func_Gaussian, -0.5, Ntrials+0.5, 3) fGaussian.SetParameters(Nexperiments, Lambda, sqrt(Lambda)) fGaussian.SetNpx(1000) # Set the number of intervals used when drawing function! fGaussian.SetLineColor(kRed) fGaussian.Draw("same") # Legend: leg = TLegend( 0.60, 0.55, 0.89, 0.75 ) leg.SetFillColor(kWhite) leg.SetLineColor(kWhite) leg.AddEntry(Hist_nSuccess, " Distribution of nSuccess", "L") leg.AddEntry(fBinomial, " Binomial distribution", "L") leg.AddEntry(fPoisson, " Poisson distribution", "L") leg.AddEntry(fGaussian, " Gaussian distribution", "L") leg.Draw() canvas.Update() if (SavePlots) : canvas.SaveAs("BinomialPoissonGaussian.pdf") #---------------------------------------------------------------------------------- # Your calculation of ChiSquare values! # I suggest you use Pearson's Chi2, and require Nobs and/or Nexp > e.g. 0.1 # Remember with your choice to ensure, that there is no division by zero. #---------------------------------------------------------------------------------- Nbins = Hist_nSuccess.GetNbinsX() # Just to know how many bins to loop over chi2_bino = 0.0 Ndof = 0 for ibin in range (Nbins) : Nobs = Hist_nSuccess.GetBinContent(ibin+1) # Note: First bin in histogram is entry 1 not 0 if (Nobs > 0) : Nexp_bino = fBinomial.Eval(ibin) print " Binomial: chi2 = %5.2f Ndof = %2d Prob = %6.4f (just a test printout!!!) "%(0.0, 0, 0.0) raw_input('Press Enter to exit') #---------------------------------------------------------------------------------- # # Make sure you understand what process yields a Binomial, a Poisson and a Gaussian # distribution. # # The program repeats an experiment, where one performs N trials, each with p chance # of success, and then considers the distribution of successes. # # # Questions: # ---------- # 1) Which distribution has the longest tail towards high values? And which one has # the longest tail the other way? Possibly use a logarithmic plot. # # 2) Using Nexperiments=1000, Ntrials=10 and pSuccess=1/6, calculate "by hand" (i.e. # using a loop) the ChiSquare between the data and each of the three distributions. # Since you are not fitting anything, what is the number of degrees of freedom? # Do they give a reasonable Chi2 probability? # Retry with Ntrials=100 and pSuccess=1/60. Does the degree of fit improve for any # of the three? # Retry again with Ntrials=1000 and pSuccess=1/60. Again, does the degree of fit # improve further for any of the three? # # 3) In the above case with Ntriels=100 and pSuccess=1/60, calculate the mean and the # width of the distribution. Do they have the relation, that you would expect from # a Poisson distribution? # Try it for other values where the Poisson describes the data well, and check if # the Poisson relation between mean and width holds. # # 4) Using Nexperiments=1000, Ntrials=1000 and pSuccess=1/60, is the skewness consistent # with zero (as the Gaussian should have)? Does that contradict the conclusion from # above? # #----------------------------------------------------------------------------------