#!/usr/bin/env python # ----------------------------------------------------------------------------------- # # # Python macro for illustrating the Binomial, Poisson and Gaussian distributions, # and how they transform into each other (well, into the Gaussian). # # Reference: # Barlow, chapter 3 # # Run this Python macro by: # > ./BinomialPoissonGaussian.py # # Author: Troels C. Petersen (NBI) # Email: petersen@nbi.dk # Date: 19th of November 2017 # # ----------------------------------------------------------------------------------- # from __future__ import division, print_function import numpy as np import matplotlib.pyplot as plt from iminuit import Minuit from probfit import Chi2Regression from scipy.stats import binom, poisson, norm from scipy import stats plt.close('all') # Set parameters: r = np.random # Random generator r.seed(42) # Fixed order of random numbers SavePlots = False verbose = True Nverbose = 10 Nexperiments = 1000 #---------------------------------------------------------------------------------- # Initial figures: #---------------------------------------------------------------------------------- def func_Binomial_pmf(x, n, p): return binom.pmf(x, n, p) def func_Gaussian_pdf(x, mu, sigma) : return norm.pdf(x, mu, sigma) n, p = 10, 0.2 xaxis = np.linspace(-0.5, 10.5, 1000) yaxis_binom = func_Binomial_pmf(np.floor(xaxis+0.5), n, p) # note the floor command here yaxis_gauss = func_Gaussian_pdf(xaxis, n*p, np.sqrt(n*p*(1-p))) fig0, ax0 = plt.subplots(figsize=(10, 6)) ax0.plot(xaxis, yaxis_binom, '-', label='Binomial with n={:2d} and p={:.3f}'.format(n, p)) ax0.plot(xaxis, yaxis_gauss, '-', label='Gaussian with mu={:.2f} and sigma={:.3f}'.format(n*p, np.sqrt(n*p*(1-p)))) ax0.set_xlim(-0.5, 10.5) ax0.set_title('Probability for Binomial and Gaussian') ax0.set_xlabel('x') ax0.set_ylabel('Probability') plt.legend(loc='upper right') #---------------------------------------------------------------------------------- # Loop over process: #---------------------------------------------------------------------------------- # Define process, i.e. number of trials and probability of success: Ntrials = 2000 pSuccess = 1.0/60.0 Lambda = Ntrials * pSuccess # This is the mean and also how the Poisson parameter is defined! all_nSuccess = np.zeros(Nexperiments) # Run the experiments, and fill the histogram from above: for iexp in range( Nexperiments ) : # Simulating process defined: nSuccess = 0 for i in range( Ntrials ) : x = r.uniform() if (x < pSuccess): nSuccess = nSuccess + 1 # Record result: if (verbose and iexp < Nverbose): print(" nSuccess: {:4d}".format(nSuccess)) # Save Result all_nSuccess[iexp] = nSuccess #---------------------------------------------------------------------------------- # Plot result: #---------------------------------------------------------------------------------- # Define a histogram with the "data" (Note: think about the binning!): counts, bin_edges = np.histogram(all_nSuccess, bins=Ntrials+1, range=(-0.5, Ntrials+0.5)) bin_centers = (bin_edges[1:] + bin_edges[:-1])/2 s_counts = np.sqrt(counts) x = bin_centers[counts>0] y = counts[counts>0] sy = s_counts[counts>0] fig, ax = plt.subplots() ax.errorbar(x, y, yerr=sy, xerr=0.5, label='Distribution of nSucces', fmt='.k', ecolor='k', elinewidth=1, capsize=1, capthick=1) ax.set_xlim(-0.5, 60) ax.set_xlabel('Number of successes') ax.set_ylabel('Frequency') # Draw Binomial: # -------------- def func_Binomial(x, N, n, p): return N * binom.pmf(x, n, p) Chi2_Bin = Chi2Regression(func_Binomial, x, y, sy) minuit_Bin = Minuit(Chi2_Bin, pedantic=False, N=Nexperiments, n=Ntrials, p=pSuccess) # minuit_Bin.migrad() # perform the actual fit xaxis = np.linspace(-0.5, 60, 1000) yaxis = func_Binomial(np.floor(xaxis+0.5), *minuit_Bin.args) ax.plot(xaxis, yaxis, '-', label='Binomial distribution') # Draw Poisson: # ------------- def func_Poisson(x, N, mu) : return N * poisson.pmf(x, mu) Chi2_Poisson = Chi2Regression(func_Poisson, x, y, sy) minuit_Poisson = Minuit(Chi2_Poisson, pedantic=False, N=Nexperiments, mu=Lambda) # minuit_Poisson.migrad() # perform the actual fit # Note that the np.floor function takes the integer/rounded (towards zero) value of numbers. yaxis = func_Poisson(np.floor(xaxis+0.5), *minuit_Poisson.args) ax.plot(xaxis, yaxis, '-', label='Poisson distribution') # Draw Gaussian: # ------------- def func_Gaussian(x, N, mu, sigma) : return N * norm.pdf(x, mu, sigma) Chi2_Gaussian = Chi2Regression(func_Gaussian, x, y, sy) minuit_Gaussian = Minuit(Chi2_Gaussian, pedantic=False, N=Nexperiments, mu=Lambda, sigma=np.sqrt(Lambda)) # minuit_Gaussian.migrad() # perform the actual fit yaxis = func_Gaussian(xaxis, *minuit_Gaussian.args) ax.plot(xaxis, yaxis, '-', label='Gaussian distribution') plt.legend() plt.tight_layout() plt.show(block=False) if (SavePlots): fig.savefig("BinomialPoissonGaussian.pdf") #---------------------------------------------------------------------------------- # Your calculation of ChiSquare values! # I suggest you use Pearson's Chi2, and require Nobs and/or Nexp > e.g. 0.1 # Remember with your choice to ensure, that there is no division by zero. #---------------------------------------------------------------------------------- Nbins = len(x) # Just to know how many bins to loop over chi2_bino = 0.0 Ndof = 0 for ibin in range (Nbins) : Nobs = y[ibin] if (Nobs > 0) : Nexp_bino = func_Binomial(x[ibin], *minuit_Bin.args) print(" Binomial: chi2 = {:5.2f} Ndof = {:2d} Prob = {:6.4f} (just a test printout!!!) ".format(0.0, 0, 0.0)) # Finally, ensure that the program does not termine (and the plot disappears), before you press enter: try: __IPYTHON__ except: raw_input('Press Enter to exit') #---------------------------------------------------------------------------------- # # Make sure you understand what process yields a Binomial, a Poisson and a Gaussian # distribution. # # The program repeats an experiment, where one performs N trials, each with p chance # of success, and then considers the distribution of successes. # # # Questions: # ---------- # 1) Plot a Binomial (N=27, p=1/3), Poisson (lambda = 9), and Gaussian (mu=9, sigma=3). # Which distribution has the longest tail towards high values? And which one has # the longest tail the other way? Possibly use a logarithmic plot. # # 2) Using Nexperiments=1000, Ntrials=10 and pSuccess=1/6, calculate "by hand" (i.e. # using a loop) the ChiSquare between the data and each of the three distributions. # Since you are not fitting anything, what is the number of degrees of freedom? # Do they give a reasonable Chi2 probability? # Retry with Ntrials=100 and pSuccess=1/60. Does the degree of fit improve for any # of the three? # Retry again with Ntrials=1000 and pSuccess=1/60. Again, does the quality of the fit # improve further for any of the three? # # 3) In the above case with Ntriels=100 and pSuccess=1/60, calculate the mean and the # width of the distribution. Do they have the relation, that you would expect from # a Poisson distribution? # Try it for other values where the Poisson describes the data well, and check if # the Poisson relation between mean and width holds. # # 4) Using Nexperiments=1000, Ntrials=1000 and pSuccess=1/60, is the skewness consistent # with zero (as the Gaussian should have)? Does that contradict the conclusion from # above? # #----------------------------------------------------------------------------------