#!/usr/bin/env python # ----------------------------------------------------------------------------------- # # Python script for testing Raphael Weldon's dices. # ----------------------------------------------------------------------------------- # # # Walter Frank Raphael Weldon DSc FRS (15 March 1860 - 13 April 1906) # generally called Raphael Weldon, was an English evolutionary # biologist and a founder of biometry. He was the joint founding # editor of the journal Biometrika, with Francis Galton and Karl Pearson. # # Weldon and his two collegues were interested in statistics # (to say the least!), and in order to do a simple hypothesis test, # Weldon rolled 12 dices 26306 times (i.e. 315672 throws), which he # wrote about in a letter to Galton dated 2nd of February 1894 # (which is how we have this data). # # There were actually four data sets as follows: # # I: 12 dice were rolled 26306 times, and number of 5s and 6s was # counted with the following result: # 0 1 2 3 4 5 6 7 8 9 10 11 12 # 185 1149 3265 5475 6114 5194 3067 1331 403 105 14 4 0 # # II: A subset of 7006 of the 26306 experiments were performed by a clerk, deemed # by Galton to be "reliable and accurate", yielding: # 0 1 2 3 4 5 6 7 8 9 10 11 12 # 45 327 886 1475 1571 1404 787 367 112 29 2 1 0 # # III: In this other data set, 4096 of the rolls were scrutinized # with only a 6 counting as a success: # 0 1 2 3 4 5 6 7 8 9 10 11 12 # 447 1145 1181 796 380 115 24 8 0 0 0 0 0 # # IV: Finally, a data set of 4096 rolls were considered counting 4s, 5s # and 6s as successes: # 0 1 2 3 4 5 6 7 8 9 10 11 12 # 0 7 60 198 430 731 948 847 536 257 71 11 0 # # We will only consider the first (and largest) data set, and test the # hypothesis, that p(5,6) = 1/3 against alternative hypothesis. # # References: # Kemp, A.W. and Kemp, C.D. (1991), "Weldon's dice data revisited", # American Statistician, 45, 216-222. # # ----------------------------------------------------------------------------------- # # # Author: Troels C. Petersen # Email: petersen@nbi.dk # Date: 14th of December 2017 # # ----------------------------------------------------------------------------------- # from __future__ import division, print_function import numpy as np import matplotlib.pyplot as plt from scipy import stats, special # special.binom from iminuit import Minuit from probfit import Chi2Regression plt.close('all') r = np.random r.seed(42) # Settings for script: verbose = True SavePlots = False Ndices = 12 Noutcome = Ndices+1 # ------------------------------------------------------------------ # # Data: # ------------------------------------------------------------------ # sum1 = 0 data1 = np.array([185, 1149, 3265, 5475, 6114, 5194, 3067, 1331, 403, 105, 14, 4, 0]) data2 = np.array([ 45, 327, 886, 1475, 1571, 1404, 787, 367, 112, 29, 2, 1, 0]) data3 = np.array([447, 1145, 1181, 796, 380, 115, 24, 8, 0, 0, 0, 0, 0]) data4 = np.array([ 0, 7, 60, 198, 430, 731, 948, 847, 536, 257, 71, 11, 0]) sum1 = np.sum(data1) if (verbose) : print(" N data1 data2 data3 data4") for i in range (Noutcome) : print(" {:2d}: {:5d} {:5d} {:5d} {:5d}".format(i, data1[i], data2[i], data3[i], data4[i])) # ------------------------------------------------------------------ # # Testing simple hypothesis (p_naive = 1/3): # ------------------------------------------------------------------ # # What should the errors be like? data1_error = np.ones_like(data1)*100 # This is obviously wrong! Put the right value. data1_expected = np.zeros(Noutcome) data1_expected_error = np.zeros_like(data1_expected) # Make factorial array: factorial = [0]*Noutcome factorial[0] = 1.0 for i in range (1, Noutcome) : factorial[i] = factorial[i-1] * i # Make prediction with "naive" (i.e. p(5 or 6) = 1/3) guess: p_naive = 1.0 / 3.0 chi2 = 0.0 Ndof = 0 print(" - - - - - - - - - - - - - - - - - - - - - - - - - - - -") print(" N data naive pred (p=1/3) dChi2") for i in range (0, Noutcome): data1_expected[i] = 0.0 data1_expected_error[i] = 0.0 # Calculate Chi2, using Pearson's "original" method, as range is well defined: if (data1[i] > 0.0) : dchi2 = 0 if (verbose): print(" {:2d}: {:7.1f} +- {:5.1f} vs. {:7.1f} {:6.2f}".format(i, data1[i], np.sqrt(data1[i]), data1_expected[i], dchi2)) # Plot histograms with data and prediction: fig, ax = plt.subplots(figsize=(10, 6)) ax.set_xlabel("Number of 5's and 6's") ax.set_ylabel("Frequency") xvals = np.arange(0,Noutcome) ax.errorbar(xvals, data1, data1_error, fmt='r_', label='data1') plt.legend(loc='best') plt.tight_layout() plt.show(block=False) if (SavePlots) : fig.savefig("WeldonDices_NaiveHypothesis.pdf", dpi=600) pvalue_naive = stats.chi2.sf(chi2, Ndof) print(" ChiSquare between data and prediction: {:7.2f}".format(chi2)) print(" Probability of Chi2: P({:5.2f}, {:2d}) = {:8.6f}".format(chi2,Ndof,pvalue_naive)) print() # Check with KS-test if they are alike! # Finally, ensure that the program does not termine (and the plot disappears), before you press enter: try: __IPYTHON__ except: raw_input('Press Enter to exit') # ----------------------------------------------------------------------------------- # # # Questions: # ---------- # 0) Just a warmup (1 min): What distribution should the number of 5s and 6s make? And # what uncertainty would you assign each bin? # # 1) Consider for now just the first (and largest) dataset. Plot the data along with the # "naive" p(5,6) = 1/3 prediction, and calculate the Chi2. What is the probability of # this Chi2, given the number of degrees of freedom? Is p(5,6) = 1/3 very likely? # # Note: Start by expanding the printed table to include the naive prediction and the # (Pearson) Chi2 contribution for each N (and remove data2-data4 from the table!). # # 2) Try - for healthy illustration - to fit the data with a Poisson distribution. Just # to see, if the approximation N=12 is large and p=1/3 is small for this amount of data. # # 3) Next, consider an alternative hypothesis of a "non-naive" value of the probability # (perhaps imagine, that someone sat and evaluated the effect of the "holes" that the # 5 and 6 makes, and the impact on the weight distribution) p(5,6) = 0.337. What is # the ChiSquare probability of this value? # # 4) Do a likelihood ratio (LR) test between the two above hypothesis, and determine which # one is better. In order to know HOW much better the model is, one would have to make # a very delicate calculation (I'm discussing this with a professor of statistics!) or # do a significant amount of simulation. # A rough rule of thumb is to take the square root of D, but that is just an unjustified # rough rule. # # 5) Next, consider yet an alternative hypothesis of the value p(5,6), which allows it # to have ANY value, i.e. a non-simple hypothesis. Find the most probable p(5,6) by # scanning through the possible values and calculating the Chi2 and LH for each of # them. From D and the ChiSquare distribution of Ndof=1, get the probability of the # naive hypothesis (p = 1/3) vs. this alternative hypothesis. # Fitting (the minimum of) these LH and Chi2 distribution, what is the uncertainty # on this probability? And how probable is the Chi2 of this new hypothesis? # # # Advanced questions: # ------------------- # 1) Do the same exercise for the other three datasets. Do they all show the same # behavior, and is the large statistics needed? # # 2) Consider data sets 1, 3 and 4, and realise that you can from these three sets # determine p(4), p(5) and p(6) separately. Do so, and ask yourself is there is # some (statistically significant) logical ordering in these three numbers, which # for example suggests, that it is the weight of the opposite number of eyes, that # produce the slight differences from the naive p(any number) = 1/3. # # ----------------------------------------------------------------------------------- #