#!/usr/bin/env python # ----------------------------------------------------------------------------------- # # Python macro for illustration of the Central Limit Theorem (CLT). # # Random numbers from different distributions (but with RMS = 1) are added to a sum, # and many such sums are plottet. As it turns out (and as dictated by the CLT), # their distribution turns about to be Gaussian. # The example also illutrates how widths (and therefore uncertainties) are added in # quadrature, as one has to divide the sum by the square root of the number of random # numbers that went into the sum in order to get a Gaussian of unit width (when using # random numbers of unit width, i.e. RMS = 1). # # For more information on the Central Limit Theorem, see: # R. Barlow: page 49 # G. Cowan: page 33 # http://en.wikipedia.org/wiki/Central_limit_theorem # # Run this macro by: # From prompt: > ./CentralLimit.py # # Author: Troels C. Petersen (NBI) # Email: petersen@nbi.dk # Date: 11th of November 2017 # # ----------------------------------------------------------------------------------- // from __future__ import division, print_function import numpy as np import matplotlib.pyplot as plt from iminuit import Minuit from probfit import BinnedLH#, , UnbinnedLH, BinnedChi2, Chi2Regression, Extended plt.close('all') r = np.random # Random generator r.seed(42) # Set a random seed (but a fixed one - more on that later.) Nexperiments = 1000 # Number of sums produced Nuniform = 10 # Number of uniform numbers used in sum Nexponential = 10 # Number of exponential numbers used in sum Ncauchy = 10 # Number of cauchy numbers used in sum pi = 3.14159265358979323846264338328 # Selfexplanatory!!! pi = np.pi # Another way of doing it - probably better! verbose = True # Print some numbers or not? Nverbose = 10 # If so, how many? SavePlots = True # Save the plots produced to file(s)? # ============================================================================= # Initial functions # ============================================================================= # function to create a nice string output def nice_string_output(names, values, extra_spacing = 2): max_values = len(max(values, key=len)) max_names = len(max(names, key=len)) string = "" for name, value in zip(names, values): string += "{0:s} {1:>{spacing}} \n".format(name, value, spacing = extra_spacing + max_values + max_names - len(name)) return string[:-2] #---------------------------------------------------------------------------------- # Loop over process: #---------------------------------------------------------------------------------- N3sigma = 0 # Counter for the number of produced sums, that fall outside +-3 sigma x_Uniform = [] x_Exponential = [] x_Cauchy = [] x_Sum = [] for iexp in range( Nexperiments ) : if (iexp % 100 == 0) : print("At iexp : ", iexp) # Show progress! sum_value = 0.0 # sum_value is the number we are going to add random numbers to! # According to the CLT, it should be Gaussianly distributed. # Generating uniform numbers (with mean 0, and RMS of 1): for i in range( Nuniform ) : x = np.sqrt(12.0) * (r.uniform() - 0.5) # Uniform between +-sqrt(3). Why? sum_value += x x_Uniform.append(x) if (verbose and iexp == 0 and i < Nverbose) : print( " Uniform: {0:7.4f}".format(x)) # Generating exponential numbers (with mean 0, and RMS of 1): for i in range( Nexponential ) : x = -np.log( r.uniform() ) - 1.0 # Exponential starting at -1. Why? # x = r.exponential() - 1.0 # Alternative way to produce x exponentially. sum_value += x x_Exponential.append(x) if (verbose and iexp == 0 and i < Nverbose) : print( " Exponential: {0:7.4f}".format(x)) # Generating numbers according to a Cauchy distribution (1 / (1 + x^2)): for i in range( Ncauchy ) : x = np.tan(pi * (r.uniform() - 0.5)) # Cauchy with mean 0 # x = r.standard_cauchy() # Alternative way to produce x according to Cauchy PDF. sum_value += x x_Cauchy.append(x) if (verbose and iexp == 0 and i < Nverbose) : print( " Cauchy: {0:7.4f}".format(x)) Ntotal = Nuniform + Nexponential + Ncauchy sum_value = sum_value / np.sqrt( Ntotal ) # Ask yourself, why I divide by sqrt(N)? x_Sum.append(sum_value) if not (-3.0 < sum_value < 3.0) : N3sigma += 1 x_Uniform = np.array(x_Uniform) x_Exponential = np.array(x_Exponential) x_Cauchy = np.array(x_Cauchy) x_Sum = np.array(x_Sum) #---------------------------------------------------------------------------------- # Draw the input distributions: #---------------------------------------------------------------------------------- Nbins = 240 x_ranges = [(-2.5, 2.5), (-1.5, 5.5), (-6,6)] fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(12,6)) x_all = [x_Uniform, x_Exponential, x_Cauchy] titles = ['Uniform', 'Exponential', 'Cauchy'] for ax_i, x, title, x_range in zip(ax, x_all, titles, x_ranges): ax_i.hist(x, bins=Nbins, range=x_range, histtype='step') ax_i.set_title(title) ymax = ax_i.get_ylim()[1]*1.2 ax_i.set_ylim(0, ymax) names = ['Entries', 'Mean', 'Std. Dev. ', 'Std. Dev. in interval'] values = [ "{:d}".format(len(x)), "{:.3f}".format(x.mean()), "{:.3f}".format(x.std(ddof=1)), "{:.3f}".format(x[(x_range[0] 0) : f = Nscale * gauss_extended(x, fit_N, fit_mu, fit_sigma) # calc the model value residual = ( y-f ) / sy # find the uncertainty-weighted residual chi2_val += residual**2 # the chi2-value is the squared residual N_NotEmptyBin += 1 # count the bin as non-empty since sy>0 (and thus y>0) # Here we find the number of degrees of freedom, which is the number of # elements in the fit (non-empty) minus the number of fitting parameters N_DOF = N_NotEmptyBin - len(minuit.args) from scipy import stats chi2_prob = stats.chi2.sf(chi2_val, N_DOF) # The chi2 probability given N_DOF degrees of freedom names = ['Entries', 'Mean in interval', 'Std. Dev. in interval', 'Chi2/DOF', 'Prob', 'N', 'mu', 'sigma'] #+ minuit.parameters values = [ "{:d}".format(len(x_Sum)), "{:.3f}".format(x_Sum[(xmin