#!/usr/bin/env python # ----------------------------------------------------------------------------------- # # # Python/ROOT macro for illustrating error propagation using random Gaussian numbers. # # The example is based on FIRST doing the error propagation analytically, # and then verifying it by running a so-called Monte-Carlo (MC) program. # # For more information on error propagation, see: # R. J. Barlow: page 48-61 # P. R. Bevington: page 36-48 # # Author: Troels C. Petersen (NBI) # Email: petersen@nbi.dk # Date: 12th of November 2017 # # ----------------------------------------------------------------------------------- # # ----------------------------------------------------------------------------------- # # # DO THE FOLLOWING ANALYTICAL EXERCISE FIRST!!! # # 1) A class of students estimate by eye, that the length of the table in Auditorium A # is L = 3.5+-0.4m, and that the width is W = 0.8+-0.2m. # # Assuming that there is no correlation between these two measurements, calculate # ANALYTICALLY what the circumference, area, and diagonal length is including # (propagated) uncertainties. # Repeat the calculation, given that the correlation is rho(L,W) = 0.5 # # NOTE: Do the above analytical calculation before you continue below! # # ----------------------------------------------------------------------------------- # from __future__ import division, print_function import numpy as np import matplotlib.pyplot as plt from iminuit import Minuit from probfit import Chi2Regression #,UnbinnedLH, BinnedChi2 # , BinnedChi2 # , BinnedLH#, , UnbinnedLH, , , Extended plt.close('all') # function to create a nice string output def nice_string_output(names, values, extra_spacing = 2): max_values = len(max(values, key=len)) max_names = len(max(names, key=len)) string = "" for name, value in zip(names, values): string += "{0:s} {1:>{spacing}} \n".format(name, value, spacing = extra_spacing + max_values + max_names - len(name)) return string[:-2] #---------------------------------------------------------------------------------- # Error propagation #---------------------------------------------------------------------------------- # Set parameters: Nexp = 10000 # Number of experiments pi = np.pi SavePlots = False r = np.random r.seed(42) #---------------------------------------------------------------------------------- # Define parameters for two random numbers (Gaussianly distributed): #---------------------------------------------------------------------------------- mu1 = 3.5 sig1 = 0.4 mu2 = 0.8 sig2 = 0.2 rho12 = 0.0 # Correlation parameter! if (rho12 < -1.0 or rho12 > 1.0) : print("ERROR: Correlation factor not in interval [-1,1], as it is {6.2f}".format(rho12)) exit() # Calculate numbers that allows the transform from uncorrelated variables u and v # to correlated random numbers x1 and x2 below (see Barlow page 42-44 for method): theta = 0.5 * np.arctan( 2.0 * rho12 * sig1 * sig2 / ( np.square(sig1) - np.square(sig2) ) ) sigu = np.sqrt( np.abs( (np.square(sig1*np.cos(theta)) - np.square(sig2*np.sin(theta)) ) / ( np.square(np.cos(theta)) - np.square(np.sin(theta))) ) ) sigv = np.sqrt( np.abs( (np.square(sig2*np.cos(theta)) - np.square(sig1*np.sin(theta)) ) / ( np.square(np.cos(theta)) - np.square(np.sin(theta))) ) ) # This is nothing more than a matrix multiplication written out!!! # Note that the absolute value is taken before the square root to avoid sqrt(x) with x<0. #---------------------------------------------------------------------------------- # Loop over process: #---------------------------------------------------------------------------------- x1_all = np.zeros(Nexp) x2_all = np.zeros_like(x1_all) x12_all = np.zeros((Nexp, 2)) y_all = np.zeros_like(x1_all) for i in range( Nexp ) : # Get (uncorrelated) random Gaussian numbers u and v: u = r.normal( 0.0, sigu ) v = r.normal( 0.0, sigv ) # Transform into (possibly) correlated random Gaussian numbers x1 and x2: x1 = mu1 + np.cos(theta)*u - np.sin(theta)*v x2 = mu2 + np.sin(theta)*u + np.cos(theta)*v x1_all[i] = x1 x2_all[i] = x2 x12_all[i, :] = x1, x2 # Calculate y (circumference, area or diagonal): y = x1 - x2 # Just nonsense formula! Write the appropriate formula yourself! y_all[i] = y if (i < 5) : print("Gaussian: x1 = {:4.2f} x2 = {:4.2f} -> y ={:5.2f}".format( x1, x2, y )) #---------------------------------------------------------------------------------- # Plot both input distribution and resulting histogram on screen: #---------------------------------------------------------------------------------- fig, ax = plt.subplots(figsize=(10, 6)) counts, xedges, yedges, im = ax.hist2d(x12_all[:, 0], x12_all[:, 1], bins=[120, 80], range= [[0.0, 6.0], [-1.0, 3.0]], cmin=1) ax.set_aspect('equal') # NOTE: This forces the x- and y-axis to have the SAME scale!!! fig.colorbar(im) # ticks=[-1, 0, 1] ax.set_title('Histogram of lengths (x) and widths (y)') ax.set_xlabel('x') ax.set_ylabel('y') names = ['Entries', 'Mean x', 'Mean y', 'STD Dev. x', 'STD Dev. y'] values = ["{:d}".format(len(x12_all)), "{:.3f}".format(x12_all[:,0].mean()), "{:.3f}".format(x12_all[:,1].mean()), "{:.3f}".format(x12_all[:,0].std(ddof=1)), "{:.3f}".format(x12_all[:,1].std(ddof=1)), ] ax.plot([0.0, 6.0], [0.0, 0.0], "--k") # NOTE: This draws a line from [x1, x2], [y1, y2] with dashed line ("--") and in black ("k") ax.text(0.05, 0.95, nice_string_output(names, values), family='monospace', transform=ax.transAxes, fontsize=10, verticalalignment='top') plt.tight_layout() plt.show(block=False) if (SavePlots) : fig.savefig("Dist_2Dgauss.pdf") # Making a new window (canvas): # -------------------------------------------------------------------- def gaussian(x, N, mu, sigma): return N * 1. / (sigma*np.sqrt(2*np.pi)) * np.exp(-0.5* (x-mu)**2/sigma**2) xmin, xmax = 0.0, 5.0 fig2, ax2 = plt.subplots(figsize=(10, 6)) counts, bin_edges, _ = ax2.hist(y_all, 100, range=(xmin, xmax), histtype='step') bin_centers = (bin_edges[1:] + bin_edges[:-1])/2 s_counts = np.sqrt(counts) #ax2.errorbar(bin_centers[counts>0], counts[counts>0], s_counts[counts>0], fmt='.') Chi2_object = Chi2Regression(gaussian, bin_centers[counts>0], counts[counts>0], s_counts[counts>0]) minuit = Minuit(Chi2_object, pedantic=False, N=500, mu=3, sigma=1, limit_sigma=(0, 10000), print_level=0) # minuit.migrad() # perform the actual fit for name in minuit.parameters: print("Fit value: {0} = {1:.5f} +/- {2:.5f}".format(name, minuit.values[name], minuit.errors[name])) Chi2 = minuit.fval Nvar = 3 # Number of variables (alpha0 and alpha1) Ndof = len(counts[counts>0]) - Nvar # Number of degrees of freedom from scipy import stats Prob = stats.chi2.sf(Chi2, Ndof) # The chi2 probability given N_DOF degrees of freedom xaxis = np.linspace(xmin, xmax, 1000) yaxis = gaussian(xaxis, *minuit.args) ax2.plot(xaxis, yaxis) names = ['Entries', 'Mean', 'STD Dev.', 'Chi2/ndf', 'Prob', 'N', 'mu', 'sigma'] values = ["{:d}".format(len(y_all)), "{:.3f}".format(y_all.mean()), "{:.3f}".format(y_all.std(ddof=1)), "{:.3f} / {:d}".format(Chi2, Ndof), "{:.3f}".format(Prob), "{:.3f} +/- {:.3f}".format(minuit.values['N'], minuit.errors['N']), "{:.3f} +/- {:.3f}".format(minuit.values['mu'], minuit.errors['mu']), "{:.3f} +/- {:.3f}".format(minuit.values['sigma'], minuit.errors['sigma']), ] ax2.text(0.65, 0.95, nice_string_output(names, values), family='monospace', transform=ax2.transAxes, fontsize=10, verticalalignment='top') plt.tight_layout() plt.show(block=False) if (SavePlots) : fig2.savefig("Dist_ErrorProp.pdf") # Finally, ensure that the program does not termine (and the plot disappears), before you press enter: try: __IPYTHON__ except: raw_input('Press Enter to exit') #---------------------------------------------------------------------------------- # # Questions: # ---------- # # 0) First solve the problem of obtaining the Area, Circumference & Diagonal with uncertainty # ANALYTICALLY. # # 1) Now look at the program, and assure yourself that you understand what is going on. # Put in the correct expression for y in terms of x1=L and x2=W in order to calculate the # circumference, area, and diagonal length, and run the program. # Does the output correspond well to the results you expected from calculations to begin with? # # 2) Imagine that you wanted to know the central value and uncertainty of y1 and y2, given the # same above PDFs for x1=L and x2=W: # y1 = log(sqr(x1*tan(x2))+sqrt((x1-x2)/(cos(x2)+1.0+x1))) and # y2 = (1.1+sin(20*x1))*fabs(1.0/x2) # Get the central value of y, and see if you can quickly differentiate this with # respect to x1 and x2, and thus predict what uncertainty to expect for y using # the error propagation formula. It is (for once) OK to give up :-) # Next, try to estimate the central value and uncertainty using random numbers # like above - do you trust this result more? And are the distributions Gaussian? # # # Advanced questions: # ------------------- # 1) Try to generate x1 and x2 with a quadratic correlation, and see that despite # not having any linear correlation, the result on circumference, area, and diagonal # length is still affected. # #----------------------------------------------------------------------------------