#!/usr/bin/env python # ----------------------------------------------------------------------------------- // # # ROOT macro for introducing ROOT with examples. # # Author: Troels C. Petersen (NBI) # Christian Michelsen (NBI, for Python plots) # Date: 17th of November 2017 (latest update) # # Run by: ./IntroToPlottingAndFitting.py # # ---------------------------------------------------------------------------------- // # First, import the modules you want to use: from __future__ import print_function, division # import numpy as np # Matlab like syntax for linear algebra and functions import matplotlib.pyplot as plt # Plots and figures like you know them from Matlab from iminuit import Minuit # The actual fitting tool, better than scipy's from probfit import BinnedLH, Chi2Regression, Extended # Helper tool for fitting plt.close('all') # to close all open figures r = np.random # Random generator r.seed(42) # Set a random seed (but a fixed one) SavePlots = True # For now, don't save plots (once you trust your code, switch on) verbose = True # For now, print a lot of output (once you trust your code, switch off) Nverbose = 10 # But only print a lot for the first 10 random numbers # ============================================================================= # Initial functions # ============================================================================= # function to create a nice string output for figures def nice_string_output(names, values, extra_spacing = 2): max_values = len(max(values, key=len)) max_names = len(max(names, key=len)) string = "" for name, value in zip(names, values): string += "{0:s} {1:>{spacing}} \n".format(name, value, spacing = extra_spacing + max_values + max_names - len(name)) return string[:-2] #---------------------------------------------------------------------------------- # Create the data: #---------------------------------------------------------------------------------- Npoints = 10000 # Number of random points produced x_all = np.zeros(Npoints) # create empty arrays to store the numbers in y_all = np.zeros(Npoints) # Notice that we create an empty array first instead of appending to lists, for computationally speed # Loop to get some random values and fill them into the arrays: for iexp in range( Npoints ) : x_all[iexp] = r.normal() * 0.8 - 0.5 y_all[iexp] = r.normal() * 1.3 + 0.5 if (verbose and iexp < Nverbose) : print(" Histogram input value {0:2d}: x = {1:6.3f} y = {2:6.3f}".format( iexp, x_all[iexp], y_all[iexp])) # Numpy-version: x_all = r.normal(-0.5, 0.8, Npoints) y_all = r.normal(0.5, 1.3, Npoints) #---------------------------------------------------------------------------------- # Histogram of data (1D): #---------------------------------------------------------------------------------- # In the following, we want to produce two histograms (here both Gaussian), and plot # them together in a single plot. This can be done in TWO WAYS! # The first one is very MatLab-like, and probably also the simplest, but in the longer # run it has a few drawbacks, while the second way is more Pythonic (and Object Oriented), # and allows for returning to these plots later in the code. # General input (for both methods): Nbins = 100 xmin = -5 xmax = 5 # Method 0 (MatLab-like): # create just a single figure and axes plt.figure(figsize=(12, 6)) # figsize is in inches # create a histogram with Nbin anount of bins in the range from xmin to xmax. # Choose a step-type histogram with a line width of 2 in the color and label chosen. plt.hist(x_all, bins=Nbins, range=(xmin, xmax), histtype='step', linewidth=2, label='Gaussian ($\mu$ = -0.5)', color='blue') plt.hist(y_all, bins=Nbins, range=(xmin, xmax), histtype='step', linewidth=2, label='Gaussian ($\mu$ = +0.5)', color='red') plt.xlabel("Random numbers") # the label of the x axis plt.ylabel("Frequency") # the label of the y axis plt.title("Distribution of Gaussian numbers") # the title of the plot plt.legend(loc='best') # could also be # loc = 'upper right' e.g. # Method 1 (Python-like): # create just a single figure and axes fig, ax = plt.subplots(figsize=(12, 6)) # figsize is in inches # create a histogram and give it a name. # It has with Nbin anount of bins in the range from xmin to xmax. # Choose a step type histogram with a line width of 2 in the color and label chosen. hist1 = ax.hist(x_all, bins=Nbins, range=(xmin, xmax), histtype='step', linewidth=2, label='Gaussian ($\mu$ = -0.5)', color='blue') hist2 = ax.hist(y_all, bins=Nbins, range=(xmin, xmax), histtype='step', linewidth=2, label='Gaussian ($\mu$ = +0.5)', color='red') ax.set_xlabel("Random numbers") # the label of the y axis ax.set_ylabel("Frequency") # the label of the y axis ax.set_title("Distribution of Gaussian numbers") # the title of the plot ax.legend(loc='best') # could also be # loc = 'upper right' e.g. #---------------------------------------------------------------------------------- # Fit to the data / histogram (1D): #---------------------------------------------------------------------------------- # Define your PDF / model def gauss_pdf(x, mu, sigma): """Normalized Gaussian""" return 1 / np.sqrt(2 * np.pi) / sigma * np.exp(-(x - mu) ** 2 / 2. / sigma ** 2) def gauss_extended(x, N, mu, sigma) : """Non-normalized Gaussian""" return N * gauss_pdf(x, mu, sigma) # could also be written as: # gauss_extended = Extended(gauss_pdf) # Make a Binned Likelihood object based on the fit function gauss_extended and # the original data x_all. Use the same bins and bounds as the original histogram. # extended=True: this means that we let the number of events N be a variable as well # extended=False: this means that we have a fixed number of events, N. binned_likelihood = BinnedLH(gauss_extended, x_all, bins=Nbins, bound=(xmin, xmax), extended=True) # be ready to fit to the LH object with the given fit parameters. Other options are: # x=2 set intial value of x to 2 # limit_x = (-1,1) set the range for x # y=2, fix_y=True fix y value to 2 minuit = Minuit(binned_likelihood, pedantic=False, mu=-1, sigma=1, N=1000) minuit.migrad() # perform the actual fit #minuit_output = [minuit.get_fmin(), minuit.get_param_states()] # save the output parameters in case needed fit_N, fit_mu, fit_sigma = minuit.args # the fitted values of the parameters for name in minuit.parameters: print("Fit value: {0} = {1:.5f} +/- {2:.5f}".format( name, minuit.values[name], minuit.errors[name])) LLH_value = minuit.fval # the LLH value binwidth = (xmax-xmin) / Nbins # the scale factor between histogram and the fit. Takes e.g. bin width into account. x_fit = np.linspace(-5, 5, 1000) # Create the x-axis for the plot of the fitted function y_fit = binwidth*gauss_extended(x_fit, fit_N, fit_mu, fit_sigma) # the fitted function ax.plot(x_fit, y_fit, '-', color='blue', label='Fit with Gaussian to x') ax.legend(loc='upper right') # Note, we refer to the old plot "ax". Had we done another plot inbetween, # we would not have been able to plot on top of the old figure with the matlab syntax ## Note, had we wanted to plot either of the histograms as errorbars: #y, bin_edges = np.histogram(x_all, bins=Nbins, range=(xmin, xmax), normed=False) #x = 0.5*(bin_edges[1:] + bin_edges[:-1]) #sy = np.sqrt(y) #hist1 = ax.errorbar(x, y, sy, fmt='.', label='Prime number distribution') #---------------------------------------------------------------------------------- # Get statistics #---------------------------------------------------------------------------------- # In this section we calculate the needed statitiscal values for the fit # first we extract the x- and y- values of the histogram and assume # Poisson statistics to calc the uncertainties entries, bin_edges, _ = hist1 bin_centers = 0.5*(bin_edges[1:] + bin_edges[:-1]) hist1_x, hist1_y = bin_centers, entries hist1_sy = np.sqrt(hist1_y) # Here we calculate the chi2 value of the fit and the number of non-empty bins chi2_val = 0 N_NotEmptyBin = 0 for x, y, sy in zip(hist1_x, hist1_y, hist1_sy): if y > 0: f = binwidth * gauss_extended(x, fit_N, fit_mu, fit_sigma) # calc the model value residual = ( y-f ) / sy # find the uncertainty-weighted residual chi2_val += residual**2 # the chi2-value is the squared residual N_NotEmptyBin += 1 # count the bin as non-empty since sy>0 (and thus y>0) # Here we find the number of degrees of freedom, which is the number of # elements in the fit (non-empty) minus the number of fitting parameters N_DOF = N_NotEmptyBin - len(minuit.args) from scipy import stats chi2_prob = stats.chi2.sf(chi2_val, N_DOF) # The chi2 probability given N_DOF degrees of freedom names = ['Entries', 'Mean', 'STD Dev.', 'Chi2/ndf', 'Prob'] values = ["{:d}".format(len(x_all)), "{:.3f}".format(x_all.mean()), "{:.3f}".format(x_all.std(ddof=1)), "{:.3f} +/- {:.3f}".format(chi2_val, N_DOF), "{:.3f}".format(chi2_prob) ] ax.text(0.02, 0.95, nice_string_output(names, values), family='monospace', transform=ax.transAxes, fontsize=12, verticalalignment='top') # Note, we refer to the old plot "ax". Had we done another plot inbetween, # we would not have been able to plot on top of the old figure with the matlab syntax plt.tight_layout() # Remove outer whitespace plt.show(block=False) # in terminal, continue code after showing figure if SavePlots: fig.savefig('Histogram.pdf', dpi=600) #---------------------------------------------------------------------------------- # Graph with fit (1D): #---------------------------------------------------------------------------------- # Define points for a graph with errors: Ndata = 51 x = np.zeros(Ndata) # create empty arrays to store the numbers in y = np.zeros(Ndata) sx = np.zeros(Ndata) sy = np.zeros(Ndata) # Having defined four arrays filled with zeros, below we fill it with values: for i in range(Ndata) : x[i] = -1.25 + 0.05*i y[i] = -2.0 - 2.0*x[i] + 1.0*x[i]*np.sin(x[i]) + 2.0*x[i]**3 if (x[i] > 0.0) : y[i] += 0.5 # Put in a discontinuity by hand! y[i] += r.normal(0., 0.1) # Adding a Gaussian variation of 0.1 sx[i] = 0.0 # Deciding that x is precise! sy[i] = 0.1 # Writing down the error. if (verbose and i < Nverbose) : print(" Graph point {0:2d}: x = {1:6.3f} +- {2:5.3f} y = {3:6.3f} +- {4:5.3f}".format(i,x[i], sx[i], y[i], sy[i])) # Numpy-version: x = np.linspace(-1.25, -1.25+0.05*(Ndata-1), Ndata) y = -2.0 - 2.0*x + 1.0*x*np.sin(x) + 2.0*x**3 y[x>0.0] += 0.5 y += r.normal(0., 0.1, Ndata) sx = np.zeros(Ndata) sy = 0.1*np.ones(Ndata) # plot the data # notice that we give the plots new names so we can differentiate between the old and the new plots fig2, ax2 = plt.subplots(figsize=(12, 6)) ax2.errorbar(x, y, xerr=sx, yerr=sy, fmt='.') ax2.set_title("Fit of a graph") ax2.set_xlabel("X") ax2.set_ylabel("Y") ax2.set_xlim(-1.3, 1.3) # Defining external function (see below for a more advanced function): def func_simple(x, p0, p1, p2, p3): return p0 + p1*x + p2*x*np.sin(x) + p3*x*x*x chi2_object_simple = Chi2Regression(func_simple, x, y, sy) minuit_simple = Minuit(chi2_object_simple, pedantic=False, p0=0., p1=2.0, p2=0.0, p3=2.0) minuit_simple.migrad() # fit p0, p1, p2, p3 = minuit_simple.args print("Simple fit") for name in minuit_simple.parameters: print("Fit value: {0} = {1:.5f} +/- {2:.5f}".format(name, minuit_simple.values[name], minuit_simple.errors[name])) x_fit = np.linspace(-1.3, 1.3, 1000) y_fit_simple = func_simple(x_fit, p0, p1, p2, p3) ax2.plot(x_fit, y_fit_simple, '-') # Defining external function (which can include "if" statements and everything else!): def func_advanced(x, p0, p1, p2, p3, p4) : if x > 0: return p0 + p1*x + p2*x*np.sin(x) + p3*x**3 + p4 else: return p0 + p1*x + p2*x*np.sin(x) + p3*x**3 func_advanced_vec = np.vectorize(func_advanced) # Above is a function that transforms a non-vectorized function to vectorized, # such that func_advanced_vec allows x to be a numpy array instead of just a scalar chi2_object_advanced = Chi2Regression(func_advanced, x, y, sy) minuit_advanced = Minuit(chi2_object_advanced, pedantic=False, p0=0., p1=2.0, p2=0.0, p3=2.0, p4=0.5) minuit_advanced.migrad() # fit p0, p1, p2, p3, p4 = minuit_advanced.args print("Advanced fit") for name in minuit_advanced.parameters: print("Fit value: {0} = {1:.5f} +/- {2:.5f}".format(name, minuit_advanced.values[name], minuit_advanced.errors[name])) x_fit = np.linspace(-1.3, 1.3, 1000) y_fit_advanced = func_advanced_vec(x_fit, p0, p1, p2, p3, p4 ) ax2.plot(x_fit, y_fit_advanced, '-') # if you have many parameters and dont want to write out all the names, # you can write it the following way (which is called argument unpacking (the asterix *)) # p = minuit.args # save all the parameters as the tuple p # y_fit_advanced = func_advanced_vec(x_fit, *p) # input while unpacking the p chi2_val = 0 for x_i, y_i, sy_i in zip(x, y, sy): f = func_advanced( x_i, p0, p1, p2, p3, p4) residual = ( y_i - f ) / sy_i chi2_val += residual**2 DOF = Ndata - len(minuit_advanced.args) from scipy import stats chi2_prob = stats.chi2.sf(chi2_val, DOF) names = ['Entries', 'Chi2/ndf', 'Prob', 'p0', 'p1', 'p2', 'p3', 'p4'] values = ["{:d}".format(Ndata), "{:.3f} / {:d}".format(chi2_val, DOF), "{:.3f}".format(chi2_prob), "{:.3f} +/- {:.3f}".format( minuit_advanced.values['p0'], minuit_advanced.errors['p0']), "{:.3f} +/- {:.3f}".format(minuit_advanced.values['p1'], minuit_advanced.errors['p1']), "{:.3f} +/- {:.3f}".format(minuit_advanced.values['p2'], minuit_advanced.errors['p2']), "{:.3f} +/- {:.3f}".format(minuit_advanced.values['p3'], minuit_advanced.errors['p3']), "{:.3f} +/- {:.3f}".format(minuit_advanced.values['p4'], minuit_advanced.errors['p4']) ] # place a text box in upper left in axes coords ax2.text(0.02, 0.95, nice_string_output(names, values), family='monospace', transform=ax2.transAxes, fontsize=12, verticalalignment='top') plt.tight_layout() plt.show(block=False) if SavePlots: fig2.savefig('Graph.pdf', dpi=600) # If you want to execute your scripts in Spyder and normally through the terminal, # you can use the snippet below to pause scripts running in the terminal, # so your figures don't immediately close. # If you are only going to use Spyder, just remove or ignore the part below try: __IPYTHON__ except: raw_input('Press Enter to exit')