#!/usr/bin/env python # # ----------------------------------------------------------------------------------- # # # Python macro for illustrating test statistics among them the Kolmogorov-Smirnov test. # # The Kolmogorov-Smirnov test (KS-test) is a general test to evaluate if two distributions # in 1D are the same. This program applies both a binned and an unbinned KS test, and # compares it to a Chi2 test and a simple comparison of means. # # The distributions compared are two unit Gaussians, where one is then modified by # changing: # * Mean # * Width # * Normalisation # The sensitivity of each test is then considered for each of these changes. # # References: # Barlow: 155-156 # http://en.wikipedia.org/wiki/Kolmogorov-Smirnov_test # # Author: Troels C. Petersen (NBI) # Email: petersen@nbi.dk # Date: 2nd of December 2017 # # ----------------------------------------------------------------------------------- # from __future__ import print_function, division # import numpy as np # Matlab like syntax for linear algebra and functions import matplotlib.pyplot as plt # Plots and figures like you know them from Matlab from iminuit import Minuit # The actual fitting tool, better than scipy's from probfit import BinnedLH, Chi2Regression, Extended # Helper tool for fitting from scipy.special import erfc from scipy import stats plt.close('all') # to close all open figures r = np.random # Random generator r.seed(42) # Set a random seed (but a fixed one) SavePlots = False verbose = True # Function to create a nice string output for figures: def nice_string_output(names, values, extra_spacing = 2): max_values = len(max(values, key=len)) max_names = len(max(names, key=len)) string = "" for name, value in zip(names, values): string += "{0:s} {1:>{spacing}} \n".format(name, value, spacing = extra_spacing + max_values + max_names - len(name)) return string[:-2] def ax_text(x, ax, posx, posy): names = ['Entries', 'Mean', 'STD Dev.'] values = ["{:d}".format(len(x)), "{:.4f}".format(x.mean()), "{:.4f}".format(x.std(ddof=1)), ] ax.text(posx, posy, nice_string_output(names, values), family='monospace', transform=ax.transAxes, fontsize=10, verticalalignment='top') # ----------------------------------------------------------------------------------- # # Generate and fit data: # ----------------------------------------------------------------------------------- # # How many experiments, and how many events in each: Nexp = 1 NeventsA = 1000 NeventsB = 1000 # Define the two Gaussians to be generated: dist_meanA = 0.0 dist_widthA = 1.0 dist_meanB = 0.0 dist_widthB = 1.0 Nbins = 500 xmin, xmax = -5, 5 allpMean = np.zeros(Nexp) allpCSbinned = np.zeros(Nexp) allpKSbinned = np.zeros(Nexp) allpKS = np.zeros(Nexp) # Loop over how many times we want to run the experiment: for iexp in range (Nexp) : if (iexp%100 == 0) : print(" Got to experiment number: ", iexp) # Generate data: # -------------- xA_array = r.normal(dist_meanA, dist_widthA, NeventsA) xB_array = r.normal(dist_meanB, dist_widthB, NeventsB) # Calculate mean and error on mean: # --------------------------------- meanA, widthA = np.mean(xA_array), xA_array.std(ddof=1) meanB, widthB = np.mean(xB_array), xB_array.std(ddof=1) emeanA = widthA / np.sqrt(len(xA_array)) emeanB = widthB / np.sqrt(len(xB_array)) # Test if there is a difference in the mean: # ------------------------------------------ dMean = meanA - meanB dMeanError = np.sqrt(emeanA**2 + emeanB**2) nSigma_meanAB = dMean / dMeanError # Turn a number of sigmas into a probability using the error function! pMean = erfc(dMean / dMeanError / np.sqrt(2.0)) / 2.0 pMean_v2 = 1 - stats.norm(0, 1).cdf(nSigma_meanAB) # Alternative calculation of the same thing! allpMean[iexp] = pMean # Binned Chi2 and Kolmogorov-Smirnov Test: # ------------------------------------------ binnedA, binnedAEdges = np.histogram(xA_array, bins=Nbins, range=(xmin, xmax)) binnedACenters = 0.5*(binnedAEdges[1:] + binnedAEdges[:-1]) binnedB, binnedBEdges = np.histogram(xB_array, bins=Nbins, range=(xmin, xmax)) binnedBCenters = 0.5*(binnedBEdges[1:] + binnedBEdges[:-1]) cumsumA = np.cumsum(binnedA) cumsumA = cumsumA / np.max(cumsumA) cumsumB = np.cumsum(binnedB) cumsumB = cumsumB / np.max(cumsumB) mask = ((binnedA > 0) | (binnedB > 0)) # https://arxiv.org/pdf/physics/0605123.pdf pCSbinned = stats.chi2_contingency([binnedA[mask], binnedB[mask]])[1] pKSbinned = stats.ks_2samp(np.repeat(binnedACenters[mask], binnedA[mask]), np.repeat(binnedBCenters[mask], binnedB[mask]))[1] allpCSbinned[iexp] = pCSbinned allpKSbinned[iexp] = pKSbinned # Unbinned Kolmogorov-Smirnov test on arrays: pKS = stats.ks_2samp(xA_array, xB_array)[1] allpKS[iexp] = pKS if (verbose and iexp < 10) : print(" {:4d}: pMean: {:7.5f} pCSbinned: {:7.5f} pKSbinned: {:7.5f} pKS: {:7.5f}".format(iexp, pMean, pCSbinned, pKSbinned, pKS)) # In case one wants to plot the distribution for visual inspection: if (iexp == 0) : # Figure for the two distributions, A and B, in the first experiment: fig1, ax1 = plt.subplots(nrows=2, figsize=(8, 10)) ax1[0].hist(xA_array, Nbins, (xmin, xmax), histtype='step') ax1[0].set_title('Histogram A') ax1[0].set_xlabel('A') ax1[0].set_ylabel('Frequency') ax_text(xA_array, ax1[0], 0.02, 0.35) ax1[1].hist(xB_array, Nbins, (xmin, xmax), histtype='step') ax1[1].set_title('Histogram B') ax1[1].set_xlabel('B') ax1[1].set_ylabel('Frequency') ax_text(xB_array, ax1[1], 0.02, 0.35) fig1.tight_layout() plt.show(block=False) # ------------------------------------------------------------------ # # Show the distribution of fitting results: # ------------------------------------------------------------------ # Nbins = 25 if (Nexp > 1) : fig2, ax2 = plt.subplots(nrows=4, figsize=(8, 10)) ax2[0].hist(allpMean, Nbins, (0, 1), histtype='step') ax2[0].set_title('Histogram, probability mu') ax2[0].set_xlabel('p') ax2[0].set_ylabel('Counts') ax2[0].set_xlim(0, 1) ax_text(allpMean, ax2[0], 0.02, 0.35) ax2[1].hist(allpCSbinned, Nbins, (0, 1), histtype='step') ax2[1].set_title('Histogram, probability chi2 binned') ax2[1].set_xlabel('p') ax2[1].set_ylabel('Counts') ax2[1].set_xlim(0, 1) ax_text(allpCSbinned, ax2[1], 0.78, 0.9) ax2[2].hist(allpKSbinned, Nbins, (0, 1), histtype='step') ax2[2].set_title('Histogram, probability Kolmogorov binned') ax2[2].set_xlabel('p') ax2[2].set_ylabel('Counts') ax2[2].set_xlim(0, 1) ax_text(allpKSbinned, ax2[2], 0.02, 0.9) ax2[3].hist(allpKS, Nbins, (0, 1), histtype='step') ax2[3].set_title('Histogram, probability Kolmogorov unbinned') ax2[3].set_xlabel('p') ax2[3].set_ylabel('Counts') ax2[3].set_xlim(0, 1) ax_text(allpKS, ax2[3], 0.78, 0.9) fig2.tight_layout() plt.show(block=False) if SavePlots: fig2.savefig('TestDist.pdf', dpi=600) try: __IPYTHON__ except: raw_input('Press Enter to exit') # ---------------------------------------------------------------------------------- # ---------------------------------------------------------------------------------- # # Questions: # ---------- # 1) First run the program to display the two distributions A and B, when # - They are the same. # - The mean of A is increased. # - The width of A is enlarged. # - The normalisation of A is increased. # Get a feel for how much you need to change the distribution, before you can # by eye see that they are not the same. I.e. can you see any difference, if # meanA = 0.1? Or how about 0.2? How do you quantify this and when do you start # to doubt? And how about widthA = 1.1? Or 1.2? Again, can you see it by eye? # Finally, try to put 1050 events into B. Is that visible? How about 1100? # # Could you for the test of the means have calculated how much of a change in the # mean is needed for a difference to be statistically significant? Do so, and see if # it somewhat matches you guess/estimate from above! # # 2) Now run the tests 1000 times, where A and B are unit Gaussians and thus identical. # How should the distributions of the test probabilities come out? And is this the # case, approximately? # # As it happens, the binning has a small impact on the KS-probability: # From: http://www-d0.fnal.gov/~yinh/worknote/root/k-test.txt # // 1. The value of PROB should not be expected to have exactly the correct # // distribution for binned data. # // 2. The user is responsible for seeing to it that the bin widths are # // small compared with any physical phenomena of interest. # // 3. The effect of binning (if any) is always to make the value of PROB # // slightly too big. That is, setting an acceptance criterion of (PROB>0.05 # // will assure that at most 5% of truly compatible histograms are rejected, # // and usually somewhat less. # # 3) Repeat the changes in question 1), and see which tests "reacts" most to these # modifications. # How much of a change in the mean is required for 95% of the tests (of each kind) # to give a probability below 5%? How much is required for the width? And the norm? # # 4) Try to test different distributions than the Gaussian one (e.g. exponential, # uniform, etc.), and see how the tests performs. # # NOTE: The unbinned Kolmogorov-Smirnov test has the great advantage that it can handle # ANY distribution (even the Cauchy distribution - remind yourself of that one!). # The reason is, that it doesn't care about any PDF, nor how far out an outlier is. # # Advanced: # 5) Obviously, the test of the means is not sensitive the a change in the width. # Make such a test yourself by calculating the widths and the uncertainty on the # widths (or perhaps try the F-test!). # Note that in a (unit) Gaussian the uncertainty on the width is of the same order # as that of the means! # # Very advanced: # 6) Implement in ROOT the following tests: # - Lilliefors test # - Shapiro-Wilk test # - Anderson-Darling test # - Cramer-von-Mises test # - Jarque-Bera test # - Kuiper's test # - Mann-Whitney-Wilcoxon test # - Siegel-Tukey test # and quantify under various conditions the power of each and the correlation # among them. Write it up, and send it to a statistics journal. :-) # # ----------------------------------------------------------------------------------