#!/usr/bin/env python # ----------------------------------------------------------------------------------- # # # Python program for analysing measurements of the length of the lecture table in # Auditorium A at NBI. # # There are two measurements each with estimated error of the table length: # 1: Measurement with a 30cm ruler. # 2: Measurement with a 2m folding ruler. # # Each person was asked not only to state the measurement, but also their estimated # uncertainty. None of the persons could see others measurements in order to get the # largest degree of independence. Also, the 30cm ruler measurement was asked to be # done first. # # Author: Troels C. Petersen (NBI) # Email: petersen@nbi.dk # Date: 10th of December 2018 # # ----------------------------------------------------------------------------------- # # EXAMPLE of analysis to yield SOLUTION!!! # ----------------------------------------------------------------------------------- # from ROOT import * import os, math # Set the showing of statistics and fitting results (0 means off, 1 means on): gStyle.SetOptStat("emr"); # See http://root.cern.ch/root/html/TStyle.html#TStyle:SetOptStat gStyle.SetOptFit(1111); # See http://root.cern.ch/root/html/TStyle.html#TStyle:SetOptFit # Options of program: Verbose = True Blinded = False Correct30cmOff = True Correct2mOff = True ApplyChauvenetsCriterion = True SavePlots = False # Naive results: # 30cm: Naive Mean = 3.3290 +- 0.0098 RMS = 0.2172 (N = 495) # 2m: Naive Mean = 3.3320 +- 0.0112 RMS = 0.2480 (N = 493) # Correspondence between raw 30cm and 2m measurement: Diff = -0.00297 +- 0.01484 (-0.20 sigma) # FINAL: With correction (p_cut = 0.05 in Chauvenet's Criterion): # 30cm: Unweighted Mean = 3.36120 +- 0.00092 RMS = 0.01960 (N = 454) # 2m: Unweighted Mean = 3.36295 +- 0.00026 RMS = 0.00542 (N = 430) # Improvement in error from naive to selective, unweighted - 30cm: 10.6 # Improvement in error from naive to selective, unweighted - 2m: 42.7 # Correspondence between unweighted 30cm and 2m measurement: Diff = -0.00175 +- 0.00130 (-1.35 sigma) # 30cm: Weighted Mean = 3.36376 +- 0.00030 p(Chi2=460.5, Ndof=442) = 0.2622 (N = 443) # 2m: Weighted Mean = 3.36385 +- 0.00014 p(Chi2=451.1, Ndof=431) = 0.2426 (N = 432) # Improvement in error from selected unweighted to weighted - 30cm: 3.1 # Improvement in error from selected unweighted to weighted - 2m: 1.8 # Correspondence between weighted 30cm and 2m measurement: Diff = -0.00009 +- 0.00033 (-0.27 sigma) # # Correspondence between unweighted and weighted 30cm measurement: Diff = 0.00257 +- 0.00097 (2.65 sigma) # Correspondence between unweighted and weighted 2m measurement: Diff = 0.00090 +- 0.00030 (3.04 sigma) # Fitting results: # Single Gaussian fit: L = 3.36332 +- 0.00058 m # Double Gaussian fit: L = 3.36418 +- 0.00041 m # 3 x Double Gaussian: L = 3.36337 +- 0.00053 m # 3 x Triple Gaussian (outlier) fit: L = 3.36428 +- 0.00042 m # OTHER settings: # With correction (p_cut = 0.01 in Chauvenet's Criterion): # 30cm: Unweighted Mean = 3.36320 +- 0.00128 RMS = 0.02791 (N = 476) # 2m: Unweighted Mean = 3.36279 +- 0.00028 RMS = 0.00590 (N = 435) # The number of rejected 30cm measurements was: 19 # The number of rejected 2m measurements was: 58 # 30cm: Weighted Mean = 3.36444 +- 0.00025 p(Chi2=446.1, Ndof=443) = 0.4497 (N = 444) # 2m: Weighted Mean = 3.36385 +- 0.00014 p(Chi2=451.1, Ndof=431) = 0.2426 (N = 432) # With correction (p_cut = 0.30 in Chauvenet's Criterion): # 30cm: Unweighted Mean = 3.36319 +- 0.00060 RMS = 0.01225 (N = 421) # 2m: Unweighted Mean = 3.36279 +- 0.00028 RMS = 0.00590 (N = 435) # The number of rejected 30cm measurements was: 74 # The number of rejected 2m measurements was: 58 # 30cm: Weighted Mean = 3.36444 +- 0.00025 p(Chi2=446.1, Ndof=443) = 0.4497 (N = 444) # 2m: Weighted Mean = 3.36385 +- 0.00014 p(Chi2=451.1, Ndof=431) = 0.2426 (N = 432) # No correction (p_cut = 0.30 in Chauvenet's Criterion): # 30cm: Unweighted Mean = 3.36257 +- 0.00057 RMS = 0.01089 (N = 368) # 2m: Unweighted Mean = 3.36272 +- 0.00028 RMS = 0.00588 (N = 430) # The number of rejected 30cm measurements was: 127 # The number of rejected 2m measurements was: 63 # 30cm: Weighted Mean = 3.36388 +- 0.00031 p(Chi2=516.2, Ndof=403) = 0.0001 (N = 404) # 2m: Weighted Mean = 3.36371 +- 0.00015 p(Chi2=431.9, Ndof=427) = 0.4251 (N = 428) # ----------------------------------------------------------------------------------- # def sqr (a) : return a*a # ----------------------------------------------------------------------------------- # # Calculate mean and RMS from a list: # ----------------------------------------------------------------------------------- # def MeanRMS( inlist ) : if (type(inlist) is not list) : print "Error: Input to MeanRMS is NOT a list!" return [-9999999999.9, -99999999999.9] if len( inlist ) == 0 : print "Error: Input list is empty! No calculation of mean and RMS possible." return [-9999999999.9, -99999999999.9] elif len( inlist ) == 1 : print "Error: Input list has length one! No calculation of RMS possible." return [inlist[0], -99999999999.9] # Find the mean: mu = 0.0 for entry in inlist : mu += entry / float(len(inlist)) # Find the standard deviation (RMS): rms = 0.0 for entry in inlist : rms += (entry-mu)*(entry-mu) rms = math.sqrt( rms / float(len(inlist) - 1) ) return [mu, rms] # Return a list with two values: [mean, rms] # ----------------------------------------------------------------------------------- # # Apply Chauvenet's Criterion to a list and return (possibly) reduced list: # ----------------------------------------------------------------------------------- # # Note: Peirce's criterion (though not documented there) has a Python implementation at: # https://en.wikipedia.org/wiki/Peirce%27s_criterion def ChauvenetsCriterion( inlist, pmin = 0.1) : if (type(inlist) is not list) : print "Error: Input to ChauvenentsCriterion is NOT a list!" return [-9999999999.9, -99999999999.9] outlist = [entry for entry in inlist] meanrms = MeanRMS(inlist) # Calculation of initial mean and rms. # Loop over the following iterations, until the furthest outlier is probably enough (p_any_outliers > pmin): while True : # Find the furthers outlier, i.e. most distant measurement from mean (least probable) and its index: ifurthest = 0 dLfurthest = 0.0 for number, entry in enumerate( outlist ) : if (abs(entry - meanrms[0]) > dLfurthest) : ifurthest = number # Note the index, so that this entry can later be removed! dLfurthest = abs(entry - meanrms[0]) # Calculate the probability of any such outliers (taking into account that there are many measurements!): Nsigma_outlier = dLfurthest / meanrms[1] p_this_outlier = TMath.Erfc(Nsigma_outlier / sqrt(2.0)) / 2.0 p_any_outliers = 1.0 - (1.0 - p_this_outlier)**int(len(outlist)) if (Verbose) : print " %3d: L=%5.3f dL=%5.3f Nsig=%5.2f p1=%10.8f, p_all=%10.8f >? pmin=%5.3f N=%3d mean=%6.4f rms=%6.4f"%(ifurthest, outlist[ifurthest], dLfurthest, Nsigma_outlier, p_this_outlier, p_any_outliers, pmin, len(outlist), meanrms[0], meanrms[1]), # Key line: If the furthest outlier is probably enough, then stop rejecting points: if (p_any_outliers > pmin) : if (Verbose) : print " -> Accepted" break # Remove the furthest point from the list of accepted measurements (if any are left!), # Recalculate mean and RMS, and finally reiterate: if (len(outlist) > 1) : if (Verbose) : print " -> Rejected" outlist.pop(ifurthest) meanrms = MeanRMS(outlist) else : print "\n ERROR: All measurements have been rejected!" break return outlist # ----------------------------------------------------------------------------------- # # Initial data analysis - look at the data and get "naive" values: # ----------------------------------------------------------------------------------- # # Define two histograms with all the lengths recorded: Hist_L30cm = TH1F("Hist_L30cm", "Lengths estimates by 30cm ruler", 1000, 0.0, 5.0); Hist_L2m = TH1F("Hist_L2m", "Lengths estimates by 2m folding rule", 5000, 0.0, 5.0); Lblind = 0.0 r = TRandom3() r.SetSeed(1) if (Blinded) : Lblind = r.Gaus(0.0, 0.1) # I add a constant (Gaussian with +-10cm) to remain "blind" # Define lists to contain the measurements, such that you can easily loop over them later! L30cm = [] eL30cm = [] L2m = [] eL2m = [] Nread = 0 # Number of measurements read in total infiles = ["data_TableMeasurements2009.txt", "data_TableMeasurements2010.txt", "data_TableMeasurements2011.txt", "data_TableMeasurements2012.txt", "data_TableMeasurements2013.txt", "data_TableMeasurements2014.txt", "data_TableMeasurements2015.txt", "data_TableMeasurements2016.txt", "data_TableMeasurements2017.txt", "data_TableMeasurements2018.txt"] # Loop over files and open them for infile in infiles : with open( infile, 'r' ) as f : # Read and ignore header lines: header1 = f.readline() header2 = f.readline() # Loop over lines and extract variables of interest for line in f: line = line.strip() columns = line.split() if (len(columns) == 4) : # Put the values read into a list: L30cm.append(float(columns[0])) eL30cm.append(float(columns[1])) L2m.append(float(columns[2])) eL2m.append(float(columns[3])) # Histograms to show the distribution of ALL measurements (i.e. 0-5 meters): Hist_L30cm.Fill(L30cm[-1]) Hist_L2m.Fill(L2m[-1]) # Count number of lines read, and print result for visual inspection: Nread += 1 if (Verbose) : print " Measurement %3d: L(30cmR): %6.3f +- %6.3f m L(2mFR): %6.3f +- %6.3f m"%(Nread, L30cm[-1], eL30cm[-1], L2m[-1], eL2m[-1]) f.close() # ----------------------------------------------------------------------------------- # # Plot histograms on the screen in one canvas and fit with Gaussians: # ----------------------------------------------------------------------------------- # canvas = TCanvas( "canvas", "canvas", 50, 50, 1200, 600 ) canvas.Divide(1,2) minL = 0.0 maxL = 5.0 # Set all the trimmings for the histogram plotting: canvas.cd(1) Hist_L30cm.GetXaxis().SetRangeUser(minL, maxL) # From the histogram, we get the x-axis, and set the range. Hist_L30cm.GetXaxis().SetTitle("Table length (m)") # We give the x-axis a label. Hist_L30cm.GetYaxis().SetTitle("Frequency") # We give the y-axis a label. Hist_L30cm.SetLineColor(kBlue) # Set the histogram line color to blue. Hist_L30cm.SetLineWidth(2) # Set the histogram line width to two. # Define a resonable fitting function: fit_L30cm = TF1("fit_L30cm", "gaus", 3.32, 3.40) # Define a (predefined) Gaussian fitting function. fit_L30cm.SetParameters(50.0, 3.36, 0.008) # Set the starting parameters to reasonable values. fit_L30cm.SetLineColor(kRed) # Set the line color to red. fit_L30cm.SetLineWidth(2) # Set tne line width to 2. Hist_L30cm.Fit("fit_L30cm", "QR") # Note the two options: Q for Quiet and R for Range. Hist_L30cm.Draw() # Draw the histogram canvas.cd(2) Hist_L2m.GetXaxis().SetRangeUser(minL, maxL) # From the histogram, we get the x-axis, and set the range. Hist_L2m.GetXaxis().SetTitle("Table length (m)") # We give the x-axis a label. Hist_L2m.GetYaxis().SetTitle("Frequency") # We give the y-axis a label. Hist_L2m.SetLineColor(kBlue) # Set the histogram line color to blue. Hist_L2m.SetLineWidth(2) # Set the histogram line width to two. # Define a resonable fitting function: fit_L2m = TF1("fit_L2m", "gaus", 3.34, 3.38) # Define a (predefined) Gaussian fitting function. fit_L2m.SetParameters(50.0, 3.36, 0.0025) # Set the starting parameters to reasonable values. fit_L2m.SetLineColor(kRed) # Set the line color to red. fit_L2m.SetLineWidth(2) # Set tne line width to 2. Hist_L2m.Fit("fit_L2m", "QR") # Note the two options: Q for Quiet and R for Range. Hist_L2m.Draw() canvas.Update() if (SavePlots) : canvas.SaveAs("TableMeasurements_RawFitted.pdf") # Save plot (format follow extension name) # Compute approximate width of central peak, to get an estimate of the uncertainty on (good) measurements: mean30_peakfit = fit_L30cm.GetParameter(1) rms30_peakfit = fit_L30cm.GetParameter(2) mean2m_peakfit = fit_L2m.GetParameter(1) rms2m_peakfit = fit_L2m.GetParameter(2) # ----------------------------------------------------------------------------------- # # Solution note: # -------------- # These fits show, that the true value lies around the range 3.362-3.364, and that the # uncertainty of "a typical but correct" measurement is about 8mm for the 30cm ruler, # and 3.0mm for the 2m folding rule. # # In the 30cm case, I note that there are also a significant amount of measurements # +-30cm from this range. This is clearly measurements, where the number of 30cm rulers # has been off by one. As this is an understood problem, I may therefore decide to also # include these measurements, but this depends on these contributing to the overall # precision. # # In the 2m case, there are three measurements which are 2m short, and these could of # course also be included. # ----------------------------------------------------------------------------------- # # ------------------------------------------------------------------------- # Make a list with all VALID measurements and compute "naive" mean and RMS: # ------------------------------------------------------------------------- # I define "valid" as in the range 0.0m < measurement < 5.0m: L30cm_valid = [] for entry in L30cm : if (0.0 < entry < 5.0) : L30cm_valid.append(entry) else : print " The 30cm entry L = %6.3f was not considered valid!"%(entry) meanrms30 = MeanRMS(L30cm_valid) mean30 = meanrms30[0] rms30 = meanrms30[1] emean30 = meanrms30[1] / sqrt(len(L30cm_valid)) L2m_valid = [] for entry in L2m : if (0.0 < entry < 5.0) : L2m_valid.append(entry) else : print " The 2m entry L = %6.3f was not considered valid!"%(entry) meanrms2m = MeanRMS(L2m_valid) mean2m = meanrms2m[0] rms2m = meanrms2m[1] emean2m = meanrms2m[1] / sqrt(len(L2m_valid)) # This is the "naive" mean, in that it simply uses all measurements without any inspection. print "\n --------------------------- NAIVE/RAW results ---------------------------------" print " 30cm: Naive Mean = %6.4f +- %6.4f RMS = %6.4f (N = %2d)"%(mean30+Lblind, emean30, rms30, int(len(L30cm_valid))) print " 2m: Naive Mean = %6.4f +- %6.4f RMS = %6.4f (N = %2d)"%(mean2m+Lblind, emean2m, rms2m, int(len(L2m_valid))) # Also compute the difference between the 30cm and 2m separately combined measurements: diff_raw = mean30 - mean2m ediff_raw = sqrt(sqr(emean30) + sqr(emean2m)) nsig_raw = diff_raw / ediff_raw print " Correspondence between raw 30cm and 2m measurement: Diff = %7.5f +- %7.5f (%4.2f sigma)"%(diff_raw, ediff_raw, nsig_raw) # ----------------------------------------------------------------------------------- # # Apply correction to measurements (by +-30cm and/or +-2m), if I choose to do so. # For correction, use means found above, and simply shift everything 15-45cm away # by 30cm, leaving it to subsequent analysis, if point is to be included or not. Ncorr30cm = 0 Ncorr2m = 0 L30cm_corrected = [] for entry in L30cm_valid : if (Correct30cmOff and mean30-0.45 < entry < mean30-0.15) : L30cm_corrected.append(entry+0.30) Ncorr30cm += 1 elif (Correct30cmOff and mean30+0.15 < entry < mean30+0.45) : L30cm_corrected.append(entry-0.30) Ncorr30cm += 1 else : L30cm_corrected.append(entry) L2m_corrected = [] for entry in L2m_valid : if (Correct2mOff and mean2m-3.0 < entry < mean2m-1.0) : L2m_corrected.append(entry+2.0) Ncorr2m += 1 elif (Correct2mOff and mean2m+3.0 < entry < mean2m+1.0) : L2m_corrected.append(entry-2.0) Ncorr2m += 1 else : L2m_corrected.append(entry) # Print the number of corrected measurements: print "\n Number of CORRECTED measurements:" print " 30cm: Ncorr = %3d"%(Ncorr30cm) print " 3m: Ncorr = %3d"%(Ncorr2m) # ----------------------------------------------------------------------------------- # # Apply Chauvenet's Criterion or simply only accept measurements around some interval: # ----------------------------------------------------------------------------------- # # Define a list of accepted measurements for unweighted mean, following either Chauvenet's Criterion # or a simpler selection (IMPORTANT NOTE: ALWAYS PRINT REJECTED VALUES TO KEEP CONTROL!): # First define lists (empty for now) of accepted measurements for the unweighted mean: L30cm_accepted_uw = [] L2m_accepted_uw = [] if (Verbose) : print "\n The following measurements are deemed poor and removed by..." if (ApplyChauvenetsCriterion) : print "\n --------------------------- Chauvenet's Criterion ---------------------------------" # NOTE: The last number is the cut on p-value at which we accept the outermost point. if (Verbose) : print "Rejected 30cm measurements:" L30cm_accepted_uw = ChauvenetsCriterion( L30cm_corrected, 0.05) # L30cm_accepted_uw = ChauvenetsCriterion( L30cm_corrected, 0.3) # To be used if no corrections are done! if (Verbose) : print "Rejected 2m measurements:" L2m_accepted_uw = ChauvenetsCriterion( L2m_corrected, 0.05) # ----------------------------------------------------------------------------------- # # Solution note: # -------------- # Applying Chauvenet's Criterion for the 30cm ruler measurements, has the "problem", that # it is not very good at dealing with the two peaks at +-30cm! If these are to be removed, # then a lot of reasonably good measurements will also be removed. # The solution is to apply the correction to data, i.e. deem all measurements around +-30cm # away from the peak as clearly a result of miscounting, and adjust them by 30cm. This is # done in the interval (15cm-45cm), which may seem excessive, but if a measurement is 20cm # away, it will in the corrected version still be 10cm away... thus still a poor measurement! # # However, the choice of correcting or not, and what value of the probability to choose for # discarding measurements is by no means given, and the result of trying various approaches, # which is why one should certainly MAKE SURE that one investigates how much the final # result depends on these choices (which are then a systematic uncertainty!). # ----------------------------------------------------------------------------------- # else : # Cut at a "reasonable" value, i.e. one that would not discart many/any points, if the # distribution was nicely Gaussian. Here, I take that to be 4 sigma, which has a probability # of p = 0.000063 (two-sided), thus with 286 numbers, the chance of one being outside is # roughly p*N = 0.018, or about 2%. So I would rarely reject a good measurement. I obtain the # width (i.e. estimated uncertainty on the individual measurements) by fitting the central peak # by a Gaussian. Nsigma_cut = 4.0 print " --------------------------- Nsigma=%4.2f cut away from core peak ---------------------------"%(Nsigma_cut) # 30cm case: L30cm_accepted_uw = [] Lmin = mean30_peakfit - Nsigma_cut*rms30_peakfit Lmax = mean30_peakfit + Nsigma_cut*rms30_peakfit print " 30cm: Given: mean = %6.4f, rms = %6.4f, the allowed range is: %6.4f - %6.4f"%(mean30_peakfit, rms30_peakfit, Lmin, Lmax) for number, entry in enumerate( L30cm_corrected ) : Nsigma = (entry - mean30_peakfit) / rms30_peakfit if (abs(Nsigma) < Nsigma_cut) : L30cm_accepted_uw.append(entry) else : if (Verbose) : print " %3d: %5.3f Nsig=%6.2f N=%3d -> Rejected"%(number, abs(entry - mean30_peakfit), Nsigma, len(L30cm_accepted_uw)) # 2m case: L2m_accepted_uw = [] Lmin = mean2m_peakfit - Nsigma_cut*rms2m_peakfit Lmax = mean2m_peakfit + Nsigma_cut*rms2m_peakfit print " 2m: Given: mean = %6.4f, rms = %6.4f, the allowed range is: %6.4f - %6.4f"%(mean2m_peakfit, rms2m_peakfit, Lmin, Lmax) for number, entry in enumerate( L2m_corrected ) : Nsigma = (entry - mean2m_peakfit) / rms2m_peakfit if (abs(Nsigma) < Nsigma_cut) : L2m_accepted_uw.append(entry) else : if (Verbose) : print " %3d: %5.3f Nsig=%6.2f N=%3d -> Rejected"%(number, abs(entry - mean2m_peakfit), Nsigma, len(L2m_accepted_uw)) # At the very least note, how many points were rejected: print "The number of rejected 30cm measurements was: %3d"%(len(L30cm_corrected) - len(L30cm_accepted_uw)) print "The number of rejected 2m measurements was: %3d"%(len(L2m_corrected) - len(L2m_accepted_uw)) # ----------------------------------------------------------------------------------- # # Plot the distribution of the remaining (accepted) measurements: # ----------------------------------------------------------------------------------- # HistAcc_L30cm = TH1F("HistAcc_L30cm", "Accepted lengths estimates by 30cm ruler", 100, 3.0, 3.5); HistAcc_L2m = TH1F("HistAcc_L2m", "Accepted lengths estimates by 2m folding rule", 200, 3.0, 3.5); for entry in L30cm_accepted_uw : HistAcc_L30cm.Fill(entry) for entry in L2m_accepted_uw : HistAcc_L2m.Fill(entry) canvas2 = TCanvas( "canvas2", "canvas2", 100, 100, 1200, 600 ) canvas2.Divide(1,2) minL = 3.3 maxL = 3.5 canvas2.cd(1) canvas2.SetLogy() HistAcc_L30cm.GetXaxis().SetRangeUser(minL, maxL) # From the histogram, we get the x-axis, and set the range. HistAcc_L30cm.GetXaxis().SetTitle("Table length (m)") # We give the x-axis a label. HistAcc_L30cm.GetYaxis().SetTitle("Frequency") # We give the y-axis a label. HistAcc_L30cm.SetLineColor(kBlue) # Set the histogram line color to blue. HistAcc_L30cm.SetLineWidth(2) # Set the histogram line width to two. # Define a resonable fitting function: fitAcc_L30cm = TF1("fitAcc_L30cm", "gaus", 3.33, 3.39)# Define a (predefined) Gaussian fitting function. fitAcc_L30cm.SetLineColor(kRed) # Set the line color to red. fitAcc_L30cm.SetLineWidth(2) # Set tne line width to 2. HistAcc_L30cm.Fit("fitAcc_L30cm", "QRL") # Note the two options: Q for Quiet and R for Range. HistAcc_L30cm.Draw() # Draw the histogram canvas2.cd(2) canvas2.SetLogy() HistAcc_L2m.GetXaxis().SetRangeUser(minL, maxL) # From the histogram, we get the x-axis, and set the range. HistAcc_L2m.GetXaxis().SetTitle("Table length (m)") # We give the x-axis a label. HistAcc_L2m.GetYaxis().SetTitle("Frequency") # We give the y-axis a label. HistAcc_L2m.SetLineColor(kBlue) # Set the histogram line color to blue. HistAcc_L2m.SetLineWidth(2) # Set the histogram line width to two. # Define a resonable fitting function: fitAcc_L2m = TF1("fitAcc_L2m", "gaus", 3.34, 3.38) # Define a (predefined) Gaussian fitting function. fitAcc_L2m.SetLineColor(kRed) # Set the line color to red. fitAcc_L2m.SetLineWidth(2) # Set tne line width to 2. HistAcc_L2m.Fit("fitAcc_L2m", "QRL") # Note the two options: Q for Quiet and R for Range. HistAcc_L2m.Draw() canvas2.Update() if (SavePlots) : canvas2.SaveAs("TableMeasurements_Accepted.pdf") # ----------------------------------------------------------------------------------- # # Data analysis (mostly done!) and results for unweighted result: # ----------------------------------------------------------------------------------- # print "\n --------------------------- UNWEIGHTED results ---------------------------------" # Get the mean, RMS and error on mean from the list of accepted measurements - 30cm: meanrms30 = MeanRMS(L30cm_accepted_uw) mean_uw30 = meanrms30[0] rms_uw30 = meanrms30[1] emean_uw30 = meanrms30[1] / sqrt(len(L30cm_accepted_uw)) print " 30cm: Unweighted Mean = %7.5f +- %7.5f RMS = %7.5f (N = %3d)"%(mean_uw30+Lblind, emean_uw30, rms_uw30, len(L30cm_accepted_uw)) # Get the mean, RMS and error on mean from the list of accepted measurements - 2m: meanrms2m = MeanRMS(L2m_accepted_uw) mean_uw2m = meanrms2m[0] rms_uw2m = meanrms2m[1] emean_uw2m = meanrms2m[1] / sqrt(len(L2m_accepted_uw)) print " 2m: Unweighted Mean = %7.5f +- %7.5f RMS = %7.5f (N = %3d)"%(mean_uw2m+Lblind, emean_uw2m, rms_uw2m, len(L2m_accepted_uw)) print " Improvement in error from naive to selective, unweighted - 30cm: %4.1f"%(emean30 / emean_uw30) print " Improvement in error from naive to selective, unweighted - 2m: %4.1f"%(emean2m / emean_uw2m) diff_uw = mean_uw30 - mean_uw2m ediff_uw = sqrt(sqr(emean_uw30) + sqr(emean_uw30)) nsig_uw = diff_uw / ediff_uw print " Correspondence between unweighted 30cm and 2m measurement: Diff = %7.5f +- %7.5f (%4.2f sigma)"%(diff_uw, ediff_uw, nsig_uw) # ----------------------------------------------------------------------------------- # # Solution note: # -------------- # The reduction of the uncertainty from the "naive" 11mm and 14mm to 0.61mm and 0.32mm # is an improvement by a factor of about 20 and 50, for the 30cm and 2m cases respectively, # This corresponds to 20*20 = 400 and 50*50 = 2500 as many measurements!!! # So if the naive uncertainty was to be decreased to the more clever (unweighted) one, # it would in the 2m case require the entire adult population of Copenhagen to measure it! # # Regarding the inclusion of measurements with +-30cm and +-2m bias, I note that the # uncertainty on the mean does not decrease from including these measurements, and I # therefore decide NOT to include them. Note that this conclusion can be done, even if # the central values are blinded! # # Finally, despite the very small uncertainties, the two results are still in very good # agreement with each other. # ----------------------------------------------------------------------------------- # # ----------------------------------------------------------------------------------- # # ----------------------------------------------------------------------------------- # # Detailed data analysis for weighted result - looking closer at the uncertainties: # ----------------------------------------------------------------------------------- # # ----------------------------------------------------------------------------------- # print "\n ------------------------ PULL - Cleaning weighted events ------------------------------" # ------------------------------------------------------------------------- # Make a list with all VALID and CORRECTED measurements for weighted case: # ------------------------------------------------------------------------- # First we have to "remake" the list of valid and corrected measurements, as this takes # on a new definition in the weighted case, in that we now also have to consider the # uncertainties (sometimes given as -1, i.e. not included). # One could possibly also require the uncertainties to be above some absolute minimum, # which I choose to apply for just one measurement (2m claiming 0.1mm uncertainty!). L30cm_corrected_w = [] eL30cm_corrected_w = [] for i, entry in enumerate(L30cm) : if (0.0 < entry < 5.0 and eL30cm[i] > 0.0) : if (Correct30cmOff and mean_uw30-0.45 < entry < mean_uw30-0.15) : L30cm_corrected_w.append(entry+0.30) elif (Correct30cmOff and mean_uw30+0.15 < entry < mean_uw30+0.45) : L30cm_corrected_w.append(entry-0.30) else : L30cm_corrected_w.append(entry) if (eL30cm[i] < 0.0005) : eL30cm_corrected_w.append(0.001) # Set minimum uncertainty to 0.5mm else : eL30cm_corrected_w.append(eL30cm[i]) L2m_corrected_w = [] eL2m_corrected_w = [] for i, entry in enumerate(L2m) : if (0.0 < entry < 5.0 and eL2m[i] > 0.0) : if (Correct2mOff and mean_uw30-3.0 < entry < mean_uw30-1.0) : L2m_corrected_w.append(entry+2.0) elif (Correct2mOff and mean_uw30+3.0 < entry < mean_uw30+1.0) : L2m_corrected_w.append(entry-2.0) else : L2m_corrected_w.append(entry) if (eL2m[i] < 0.00025) : eL2m_corrected_w.append(0.00025) # Set minimum uncertainty to 0.25mm else : eL2m_corrected_w.append(eL2m[i]) # ----------------------------------------------------------------------------------- # # Solution note: # -------------- # In order to consider a weighted result, we need to inspect the uncertainties, and # see if they make sense and if they carry any additional information. In principle # we could investigate, if the errors are correlated with the residual |value - mean|. # # However note that even perfect errors would not have a correlation coefficient very # high! The real test of whether the uncertainties are reliable, is obtained by making # a so-called pull plot, which means plotting the pull = (x_i - mean) / sigma_i # distribution. # If the errors are credible, it should be a unit Gaussian (think about why!). And if # measurements are too far away (in terms of their error), then they are not very # credible. # ----------------------------------------------------------------------------------- # # Maximum number of sigma's we'll allow a measurement to be away from the (approximate) mean: max_pull = 4.5 L30cm_accepted_w = [] eL30cm_accepted_w = [] L2m_accepted_w = [] eL2m_accepted_w = [] # Produce the pull distributions (and ensure that the errors are valid!): pull_range = 12.0 Hist_30cm_Pull = TH1F("Hist_30cm_Pull", "Pull distribution - 30cm", 120, -pull_range, pull_range); Hist_2m_Pull = TH1F("Hist_2m_Pull", "Pull distribution - 2m", 120, -pull_range, pull_range); for i, entry in enumerate(L30cm_corrected_w) : pull = (entry-mean_uw30) / eL30cm_corrected_w[i] Hist_30cm_Pull.Fill(pull) if (abs(pull) < max_pull) : L30cm_accepted_w.append(entry) eL30cm_accepted_w.append(eL30cm_corrected_w[i]) else : # Give warning, if any points are more than 4 sigma away! if (Verbose) : print " 30cm: Warning! Large pull: L = %6.4f +- %6.4f pull = %6.2f"%(L30cm_corrected_w[i], eL30cm_corrected_w[i], pull) for i, entry in enumerate(L2m_corrected_w) : pull = (entry-mean_uw2m) / eL2m_corrected_w[i] Hist_2m_Pull.Fill(pull) if (abs(pull) < max_pull) : L2m_accepted_w.append(entry) eL2m_accepted_w.append(eL2m_corrected_w[i]) else : # Give warning, if any points are more than 4 sigma away! if (Verbose) : print " 2m: Warning! Large pull: L = %6.4f +- %6.4f pull = %6.2f"%(L2m_corrected_w[i], eL2m_corrected_w[i], pull) # At the very least note, how many points were rejected: print "The number of rejected 30cm measurements (weighted) was: %3d"%(len(L30cm_corrected_w) - len(L30cm_accepted_w)) print "The number of rejected 2m measurements (weighted) was: %3d"%(len(L2m_corrected_w) - len(L2m_accepted_w)) # Plot the pull distributions: # -------------------------------------------------------------------- canvas3 = TCanvas( "canvas3", "canvas3", 150, 150, 1200, 600 ) canvas3.Divide(2,1) canvas3.cd(1) Hist_30cm_Pull.GetXaxis().SetRangeUser(-11.0, 11.0) # From the histogram, we get the x-axis, and set the range. Hist_30cm_Pull.GetXaxis().SetTitle("Pull") # We give the x-axis a label. Hist_30cm_Pull.GetYaxis().SetTitle("Frequency") # We give the y-axis a label. Hist_30cm_Pull.SetMarkerColor(kBlue) # Set the histogram line color to blue. Hist_30cm_Pull.SetMarkerStyle(20) # Set the histogram line width to two. fit_pull30 = TF1("fit_pull30", "gaus", -max_pull, max_pull) # Define a (predefined) Gaussian fitting function. fit_pull30.SetLineColor(kRed) # Set the line color to red. fit_pull30.SetLineWidth(2) # Set tne line width to 2. Hist_30cm_Pull.Fit("fit_pull30", "LRQ") # Note options: L for Likelihood, R for Range, Q for Quiet pi = 3.141592653 binwidth = 0.2 def G2(x, p) : norm1 = 1.0 / sqrt(2.0*pi) / p[3] z1 = ((x[0]) - p[2]) / p[3] norm2 = 1.0 / sqrt(2.0*pi) / p[4] z2 = ((x[0]) - p[2]) / p[4] return p[0] * binwidth * (p[1] * norm1 * exp(-0.5 *z1*z1) + (1.0-p[1]) * norm2 * exp(-0.5 *z2*z2)) fit2_pull30 = TF1("fit2_pull30", G2, -max_pull, max_pull, 5) # Define a (predefined) Gaussian fitting function. fit2_pull30.SetParameters(100, 0.5, 0.0, 0.5, 1.5) fit2_pull30.SetLineColor(kMagenta) # Set the line color to red. fit2_pull30.SetLineWidth(2) # Set tne line width to 2. Hist_30cm_Pull.Fit("fit2_pull30", "LR+") # Note options: L for Likelihood, R for Range, Q for Quiet Hist_30cm_Pull.Draw() # Draw the histogram canvas3.cd(2) Hist_2m_Pull.GetXaxis().SetRangeUser(-11.0, 11.0) # From the histogram, we get the x-axis, and set the range. Hist_2m_Pull.GetXaxis().SetTitle("Pull") # We give the x-axis a label. Hist_2m_Pull.GetYaxis().SetTitle("Frequency") # We give the y-axis a label. Hist_2m_Pull.SetMarkerColor(kBlue) # Set the histogram line color to blue. Hist_2m_Pull.SetMarkerStyle(20) # Set the histogram line width to two. fit_pull2m = TF1("fit_pull2m", "gaus", -max_pull, max_pull) # Define a (predefined) Gaussian fitting function. fit_pull2m.SetLineColor(kRed) # Set the line color to red. fit_pull2m.SetLineWidth(2) # Set tne line width to 2. Hist_2m_Pull.Fit("fit_pull2m", "LRQ") # Note options: L for Likelihood, R for Range, Q for Quiet fit2_pull2m = TF1("fit_pull2m", G2, -max_pull, max_pull, 5) # Define a (predefined) Gaussian fitting function. fit2_pull2m.SetParameters(100, 0.5, 0.0, 0.5, 1.5) fit2_pull2m.SetLineColor(kMagenta) # Set the line color to red. fit2_pull2m.SetLineWidth(2) # Set tne line width to 2. Hist_2m_Pull.Fit("fit_pull2m", "LR+") # Note options: L for Likelihood, R for Range, Q for Quiet Hist_2m_Pull.Draw() # Draw the histogram canvas3.Update() if (SavePlots) : canvas3.SaveAs("Distribution_Pull.pdf") # ----------------------------------------------------------------------------------- # # Solution note: # -------------- # The pull distributions show, that the uncertainties given are reasonable in size # (sigma in the Gaussian fit is about 1.0, so one is dividing by reasonable errors!). # So the errors given by those who did a good measurement actually look reasonably # trustworthy, except that there are large peaks around zero, which are from serious # overestimates of the uncertainty (also giving room for too small errors). There is # thus no guarentee, that using the uncertainties will improve the result. # ----------------------------------------------------------------------------------- # print "\n --------------------------- WEIGHTED results ---------------------------------" # Calculate the weighted for 30cm ruler case: # ------------------------------------------- sum0_w30 = 0.0 sum1_w30 = 0.0 for i, entry in enumerate ( L30cm_accepted_w ) : sum0_w30 += 1.0 / sqr(eL30cm_accepted_w[i]) sum1_w30 += entry / sqr(eL30cm_accepted_w[i]) mean_w30 = sum1_w30 / sum0_w30 emean_w30 = sqrt(1.0 / sum0_w30) # If the pull was "bad", I would multiply by the pull width, # to make up for general (under-) overestimates of uncertainties. # Given that we now have the mean, calculate the Chi2: chi2_w30 = 0.0 for i, entry in enumerate ( L30cm_accepted_w ) : chi2_w30 += sqr((entry - mean_w30) / eL30cm_accepted_w[i]) print " 30cm: Weighted Mean = %7.5f +- %7.5f p(Chi2=%5.1f, Ndof=%3d) = %6.4f (N = %3d)"%(mean_w30+Lblind, emean_w30, chi2_w30, len(L30cm_accepted_w)-1, TMath.Prob(chi2_w30, len(L30cm_accepted_w)-1), len(L30cm_accepted_w)) # Calculate the weighted for 2m ruler case: # ----------------------------------------- sum0_w2m = 0.0 sum1_w2m = 0.0 for i, entry in enumerate ( L2m_accepted_w ) : sum0_w2m += 1.0 / sqr(eL2m_accepted_w[i]) sum1_w2m += entry / sqr(eL2m_accepted_w[i]) mean_w2m = sum1_w2m / sum0_w2m emean_w2m = sqrt(1.0 / sum0_w2m) # If the pull was "bad", I would multiply by the pull width, # to make up for general (under-) overestimates of uncertainties. # Given that we now have the mean, calculate the Chi2: chi2_w2m = 0.0 for i, entry in enumerate ( L2m_accepted_w ) : chi2_w2m += sqr((entry - mean_w2m) / eL2m_accepted_w[i]) print " 2m: Weighted Mean = %7.5f +- %7.5f p(Chi2=%5.1f, Ndof=%3d) = %6.4f (N = %3d)"%(mean_w2m+Lblind, emean_w2m, chi2_w2m, len(L2m_accepted_w)-1, TMath.Prob(chi2_w2m, len(L2m_accepted_w)-1), len(L2m_accepted_w)) # Again consider the improvements and relative difference: print " Improvement in error from selected unweighted to weighted - 30cm: %4.1f"%(emean_uw30 / emean_w30) print " Improvement in error from selected unweighted to weighted - 2m: %4.1f"%(emean_uw2m / emean_w2m) # ----------------------------------------------------------------------------------- # # Solution note: # -------------- # The weighted means make use of the uncertainties, and further decrease the error # on the final mean. In order for the uncertainties used to be realistic, one could # (as said) have scaled them by the width of the pull distribution (think about why!), # but that is not needed in this case, as they are very close to unity. # # The final results are indeed very accurate. As a cross check, we check if they are # consistent. # ----------------------------------------------------------------------------------- # diff_w = mean_w30 - mean_w2m ediff_w = sqrt(sqr(emean_w30) + sqr(emean_w2m)) nsig_w = diff_w / ediff_w print " Correspondence between weighted 30cm and 2m measurement: Diff = %7.5f +- %7.5f (%4.2f sigma)"%(diff_w, ediff_w, nsig_w) # ----------------------------------------------------------------------------------- # # Solution note: # -------------- # The results agree remarkably well (I suspected it would be worse!), and thus we can # actually have some confidence in the (very) small uncertainties. # # The completely final result is of course a weighted average of the two weighted # results! # # However, there might be a problem in that the result shifted significantly, when # moving from the unweighted to the weighted analysis!!! # ----------------------------------------------------------------------------------- # diff30_w_uw = mean_w30 - mean_uw30 ediff30_w_uw = sqrt(sqr(emean_w30) + sqr(emean_uw30)) nsig30_w_uw = diff30_w_uw / ediff30_w_uw print "\n Correspondence between unweighted and weighted 30cm measurement: Diff = %7.5f +- %7.5f (%4.2f sigma)"%(diff30_w_uw, ediff30_w_uw, nsig30_w_uw) diff2m_w_uw = mean_w2m - mean_uw2m ediff2m_w_uw = sqrt(sqr(emean_w2m) + sqr(emean_uw2m)) nsig2m_w_uw = diff2m_w_uw / ediff2m_w_uw print " Correspondence between unweighted and weighted 2m measurement: Diff = %7.5f +- %7.5f (%4.2f sigma)"%(diff2m_w_uw, ediff2m_w_uw, nsig2m_w_uw) # Correlation between (signed) residual and measurement weight (= 1/uncertainty^2): # ----------------------------------------------------------------------------------- # Hist_30cm_CorrResUnc = TH2F("Hist_30cm_CorrResUnc", "", 100, -0.1, 0.1, 60, 0.0, 1200000.0); Hist_2m_CorrResUnc = TH2F("Hist_2m_CorrResUnc", "", 100, -0.1, 0.1, 50, 0.0, 5000000.0); for i, entry in enumerate(L30cm_accepted_w) : Hist_30cm_CorrResUnc.Fill(entry-mean_uw30, 1.0/sqr(eL30cm_accepted_w[i])) for i, entry in enumerate(L2m_accepted_w) : Hist_2m_CorrResUnc.Fill(entry-mean_uw2m, 1.0/sqr(eL2m_accepted_w[i])) canvas4 = TCanvas( "canvas4", "canvas4", 180, 180, 1200, 600 ) canvas4.Divide(2,1) canvas4.cd(1) Hist_30cm_CorrResUnc.GetXaxis().SetRangeUser(-0.025, 0.025) Hist_30cm_CorrResUnc.GetXaxis().SetTitle("Residual") Hist_30cm_CorrResUnc.GetYaxis().SetTitle("Weight") Hist_30cm_CorrResUnc.SetMarkerColor(kBlue) Hist_30cm_CorrResUnc.SetMarkerStyle(20) Hist_30cm_CorrResUnc.Draw() canvas4.cd(2) Hist_2m_CorrResUnc.GetXaxis().SetRangeUser(-0.025, 0.025) Hist_2m_CorrResUnc.GetXaxis().SetTitle("Residual") Hist_2m_CorrResUnc.GetYaxis().SetTitle("Weight") Hist_2m_CorrResUnc.SetMarkerColor(kBlue) Hist_2m_CorrResUnc.SetMarkerStyle(20) Hist_2m_CorrResUnc.Draw() print(" Correlation between residual and weight (30cm): %5.3f"%(Hist_30cm_CorrResUnc.GetCorrelationFactor())) print(" Correlation between residual and weight (2m): %5.3f"%(Hist_2m_CorrResUnc.GetCorrelationFactor())) canvas4.Update() if (SavePlots) : canvas4.SaveAs("Correlation_ResidualUncertainty.pdf") # ----------------------------------------------------------------------------------- # # Solution note: # -------------- # So, we do see a shift in the mean from the unweighted to the weighted result. It is # right around that critical limit, where we start suspecting that it is a real effect # and not just a statistical fluctuation. # So, people who measure slightly longer tend to have a slightly smaller error. As the # uncertainties are the result of partial guess work (and not delicate error estimation # and propagation), I would here tend to think, that this effect should at the very # least be stated! But otherwise, the weighted result us usually the one to use. # ----------------------------------------------------------------------------------- # # ----------------------------------------------------------------------------------- # # Fitting solution of RAW data: # ----------------------------------------------------------------------------------- # # ---------------------------------------- Fit 30cm case -------------------------------------------- canvas5 = TCanvas( "canvas5", "canvas5", 250, 250, 1600, 1000 ) # canvas5.Divide(1,2) minL = 3.0 maxL = 3.7 # Set all the trimmings for the histogram plotting: # canvas5.cd(1) Hist_L30cm.SetMaximum(110.0) Hist_L30cm.GetXaxis().SetRangeUser(minL, maxL) # From the histogram, we get the x-axis, and set the range. Hist_L30cm.GetXaxis().SetTitle("Table length (m)") # We give the x-axis a label. Hist_L30cm.GetYaxis().SetTitle("Frequency") # We give the y-axis a label. Hist_L30cm.SetLineColor(kBlue) # Set the histogram line color to blue. Hist_L30cm.SetLineWidth(2) # Set the histogram line width to two. Hist_L30cm.Draw() # Draw the histogram # Legend: legend = TLegend(0.12, 0.68, 0.45, 0.88) legend.SetLineColor(1) legend.SetFillColor(0) legend.AddEntry(Hist_L30cm, "Data", "L") legend.AddEntry(fit_L30cm, "Simple Gaussian", "L") #legend.AddEntry(fit2_L30cm, "Double Gaussian", "L") #legend.AddEntry(fit3_L30cm, "Double Gaussian x 3", "L") #legend.AddEntry(fit4_L30cm, "Triple Gaussian x 3", "L") legend.Draw() canvas5.Update() if (SavePlots) : canvas5.SaveAs("TableMeasurements_FinalFitAnalysis_30cm_1gauss.pdf") # Save plot (format follow extension name) binwidth = 0.005 # Define a resonable fitting function: # ------------------------------------ def G2_30cm(x, p) : norm1 = 1.0 / sqrt(2.0*pi) / p[3] z1 = (x[0] - p[2]) / p[3] norm2 = 1.0 / sqrt(2.0*pi) / p[4] z2 = (x[0] - p[2]) / p[4] return p[0] * binwidth * (p[1] * norm1 * exp(-0.5 *z1*z1) + (1.0-p[1]) * norm2 * exp(-0.5 *z2*z2)) fit2_L30cm = TF1("fit2_L30cm", G2_30cm, 3.32, 3.40, 5) # Define a (predefined) Gaussian fitting function. fit2_L30cm.SetParameters(500.0, 0.5, 3.36, 0.005, 0.018) # Set the starting parameters to reasonable values. fit2_L30cm.SetLineColor(kMagenta) # Set the line color to red. fit2_L30cm.SetLineWidth(2) # Set tne line width to 2. fit2_L30cm.SetNpx(1000) Hist_L30cm.Fit("fit2_L30cm", "R+") # Note the two options: Q for Quiet and R for Range. Hist_L30cm.Draw() # Draw the histogram # Legend: legend.AddEntry(fit2_L30cm, "Double Gaussian", "L") legend.Draw() canvas5.Update() if (SavePlots) : canvas5.SaveAs("TableMeasurements_FinalFitAnalysis_30cm_2gauss.pdf") # Save plot (format follow extension name) # Expand fitting function to include two peaks on the side and outliers: # ---------------------------------------------------------------------- def G2_3peak_30cm(x, p) : # Given common widths, the normalisations are the same for all three peaks: norm1 = 1.0 / sqrt(2.0*pi) / p[3] norm2 = 1.0 / sqrt(2.0*pi) / p[4] # z-values for each peak - note the constant shift, and that there are only fit two parameters: z1 = (x[0] - p[2]) / p[3] z2 = (x[0] - p[2]) / p[4] z1_low = ((x[0] + 0.300) - p[2]) / p[3] z2_low = ((x[0] + 0.300) - p[2]) / p[4] z1_high = ((x[0] - 0.300) - p[2]) / p[3] z2_high = ((x[0] - 0.300) - p[2]) / p[4] # Putting the three peaks together G = p[1] * norm1 * exp(-0.5 *z1**2) + (1.0-p[1]) * norm2 * exp(-0.5 *z2**2) G_low = p[1] * norm1 * exp(-0.5 *z1_low**2) + (1.0-p[1]) * norm2 * exp(-0.5 *z2_low**2) G_high = p[1] * norm1 * exp(-0.5 *z1_high**2) + (1.0-p[1]) * norm2 * exp(-0.5 *z2_high**2) # Return the full expression: return binwidth * (p[0] * G + p[5] * G_low + p[6] * G_high) fit3_L30cm = TF1("fit3_L30cm", G2_3peak_30cm, 3.02, 3.70, 7) # Define a (predefined) Gaussian fitting function. fit3_L30cm.SetParameters(500.0, 0.5, 3.36, 0.003, 0.013, 20.0, 10.0) # Set the starting parameters to reasonable values. fit3_L30cm.SetLineColor(kGreen) # Set the line color to red. fit3_L30cm.SetLineWidth(2) # Set tne line width to 2. Hist_L30cm.Fit("fit3_L30cm", "RL+") # Note the two options: Q for Quiet and R for Range. Hist_L30cm.Draw() # Draw the histogram # Legend: legend.AddEntry(fit3_L30cm, "Double Gaussian x 3", "L") legend.Draw() canvas5.Update() if (SavePlots) : canvas5.SaveAs("TableMeasurements_FinalFitAnalysis_30cm_3_2gauss.pdf") # Save plot (format follow extension name) # Expand further to include a PDF accounting for outliers: # -------------------------------------------------------- def G2_3peak_outliers_30cm(x, p) : # Given common widths, the normalisations are the same for all three peaks: norm1 = 1.0 / sqrt(2.0*pi) / p[3] norm2 = 1.0 / sqrt(2.0*pi) / p[4] norm_outl = 1.0 / sqrt(2.0*pi) / p[8] # z-values for each peak - note the constant shift, and that there are only fit three parameters: z1 = (x[0] - p[2]) / p[3] z2 = (x[0] - p[2]) / p[4] z_outl = (x[0] - p[2]) / p[8] z1_low = ((x[0] + 0.300) - p[2]) / p[3] z2_low = ((x[0] + 0.300) - p[2]) / p[4] z_outl_low = ((x[0] + 0.300) - p[2]) / p[8] z1_high = ((x[0] - 0.300) - p[2]) / p[3] z2_high = ((x[0] - 0.300) - p[2]) / p[4] z_outl_high = ((x[0] - 0.300) - p[2]) / p[8] # Putting the three peaks together: G = p[1] * norm1 * exp(-0.5 *z1**2) + (1.0-p[1]-p[7]) * norm2 * exp(-0.5 *z2**2) + p[7] * norm_outl * exp(-0.5 * z_outl**2) G_low = p[1] * norm1 * exp(-0.5 *z1_low**2) + (1.0-p[1]-p[7]) * norm2 * exp(-0.5 *z2_low**2) + p[7] * norm_outl * exp(-0.5 * z_outl_low**2) G_high = p[1] * norm1 * exp(-0.5 *z1_high**2) + (1.0-p[1]-p[7]) * norm2 * exp(-0.5 *z2_high**2) + p[7] * norm_outl * exp(-0.5 * z_outl_high**2) # Gaussian (very wide) to account for outliers, yet still around same mean: # G_outliers = # Alternatively, flat distribution to account for outliers: # PDF_outliers = 1.0 # Return the full expression: return binwidth * (p[0] * G + p[5] * G_low + p[6] * G_high) fit4_L30cm = TF1("fit4_L30cm", G2_3peak_outliers_30cm, 3.02, 3.70, 9) # Define a (predefined) Gaussian fitting function. fit4_L30cm.SetParameters(500.0, 0.5, 3.36, 0.003, 0.013, 20.0, 10.0, 0.05, 0.10) # Set the starting parameters to reasonable values. fit4_L30cm.SetParNames("NevCentralPeak", "FracCore", "Mean", "WidthCore", "WidthWide", "NevLowPeak", "NevHighPeak", "FracOutliers", "WidthOutliers") # Set parameter names (documentation!) fit4_L30cm.SetLineColor(kBlack) # Set the line color to red. fit4_L30cm.SetLineWidth(2) # Set tne line width to 2. fit4_L30cm.SetNpx(1000) Hist_L30cm.Fit("fit4_L30cm", "RL+") # Note the two options: Q for Quiet and R for Range. Hist_L30cm.Draw() # Draw the histogram # Legend: legend = TLegend(0.12, 0.68, 0.45, 0.88) legend.SetLineColor(1) legend.SetFillColor(0) legend.AddEntry(Hist_L30cm, "Data", "L") legend.AddEntry(fit_L30cm, "Simple Gaussian", "L") legend.AddEntry(fit2_L30cm, "Double Gaussian", "L") legend.AddEntry(fit3_L30cm, "Double Gaussian x 3", "L") legend.AddEntry(fit4_L30cm, "Triple Gaussian x 3", "L") legend.Draw() canvas5.Update() if (SavePlots) : canvas5.SaveAs("TableMeasurements_FinalFitAnalysis_30cm.pdf") # Save plot (format follow extension name) print(" Single Gaussian fit: L = %7.5f +- %7.5f m"%(fit_L30cm.GetParameter(1), fit_L30cm.GetParError(1))) print(" Double Gaussian fit: L = %7.5f +- %7.5f m"%(fit2_L30cm.GetParameter(2), fit2_L30cm.GetParError(2))) print(" 3 x Double Gaussian: L = %7.5f +- %7.5f m"%(fit3_L30cm.GetParameter(2), fit3_L30cm.GetParError(2))) print(" 3 x Triple Gaussian (outlier) fit: L = %7.5f +- %7.5f m"%(fit4_L30cm.GetParameter(2), fit4_L30cm.GetParError(2))) # ---------------------------------------- Fit 2m case -------------------------------------------- canvas6 = TCanvas( "canvas6", "canvas6", 270, 270, 1600, 1000 ) # canvas5.cd(2) Hist_L2m.GetXaxis().SetRangeUser(minL, maxL) # From the histogram, we get the x-axis, and set the range. Hist_L2m.GetXaxis().SetTitle("Table length (m)") # We give the x-axis a label. Hist_L2m.GetYaxis().SetTitle("Frequency") # We give the y-axis a label. Hist_L2m.SetLineColor(kBlue) # Set the histogram line color to blue. Hist_L2m.SetLineWidth(2) # Set the histogram line width to two. binwidth = 0.005 def G2_2m(x, p) : norm1 = 1.0 / sqrt(2.0*pi) / p[3] z1 = ((x[0]) - p[2]) / p[3] norm2 = 1.0 / sqrt(2.0*pi) / p[4] z2 = ((x[0]) - p[2]) / p[4] return p[0] * binwidth * (p[1] * norm1 * exp(-0.5 *z1*z1) + (1.0-p[1]) * norm2 * exp(-0.5 *z2*z2)) # Define a resonable fitting function: fit3_L2m = TF1("fit3_L2m", G2_2m, 3.32, 3.40, 5) # Define a (predefined) Gaussian fitting function. fit3_L2m.SetParameters(50.0, 0.5, 3.36, 0.0025, 0.0075) # Set the starting parameters to reasonable values. fit3_L2m.SetLineColor(kMagenta) # Set the line color to red. fit3_L2m.SetLineWidth(2) # Set tne line width to 2. fit3_L2m.SetNpx(1000) Hist_L2m.Fit("fit3_L2m", "QR+") # Note the two options: Q for Quiet and R for Range. Hist_L2m.GetXaxis().SetRangeUser(3.30, 3.42) Hist_L2m.Draw() canvas6.Update() if (SavePlots) : canvas6.SaveAs("TableMeasurements_FinalFitAnalysis_2m.pdf") # Save plot (format follow extension name) # Compute approximate width of central peak, to get an estimate of the uncertainty on (good) measurements: #mean30_peakfit = fit_L30cm.GetParameter(1) #rms30_peakfit = fit_L30cm.GetParameter(2) #mean2m_peakfit = fit_L2m.GetParameter(1) #rms2m_peakfit = fit_L2m.GetParameter(2) raw_input('Press Enter to exit')