#!/usr/bin/env python # ----------------------------------------------------------------------------------- # # # Python program for analysing measurements of the length of the lecture table in # Auditorium A at NBI. # # There are two measurements each with estimated error of the table length: # 1: Measurement with a 30cm ruler. # 2: Measurement with a 2m folding ruler. # # Each person was asked not only to state the measurement, but also their estimated # uncertainty. None of the persons could see others measurements in order to get the # largest degree of independence. Also, the 30cm ruler measurement was asked to be # done first. # # Author: Troels C. Petersen (NBI) # Email: petersen@nbi.dk # Date: 7th of September 2013 # # ----------------------------------------------------------------------------------- # # SUGGESTED SOLUTION!!! # ----------------------------------------------------------------------------------- # from ROOT import * import os, math # Set the showing of statistics and fitting results (0 means off, 1 means on): gStyle.SetOptStat("emr"); # See http://root.cern.ch/root/html/TStyle.html#TStyle:SetOptStat gStyle.SetOptFit(1111); # See http://root.cern.ch/root/html/TStyle.html#TStyle:SetOptFit Include30cmOff = True SavePlots = False # ----------------------------------------------------------------------------------- # # ----------------------------------------------------------------------------------- # # Initial data analysis - look at the data and get "naive" values: # ----------------------------------------------------------------------------------- # # ----------------------------------------------------------------------------------- # # Define two histograms with the lengths recorded and the correlation between length residual and uncertainty: Hist_L30cm = TH1F("Hist_L30cm", "Lengths estimates by 30cm ruler", 1000, 0.0, 5.0); Hist_L2m = TH1F("Hist_L2m", "Lengths estimates by 2m folding rule", 1000, 0.0, 5.0); # Define lists to contain the measurements, such that you can easily loop over them later! L30cm = [] eL30cm = [] L2m = [] eL2m = [] # Define sums, from which the mean and RMS can be calculated: sum0 = 0.0 sum1 = 0.0 sum2 = 0.0 sum0_2m = 0.0 sum1_2m = 0.0 sum2_2m = 0.0 files = ["data_TableMeasurements2009.txt", "data_TableMeasurements2010.txt", "data_TableMeasurements2011.txt", "data_TableMeasurements2012.txt", "data_TableMeasurements2013.txt"] # Loop over files and open them for file in files : with open( file, 'r' ) as f : # Read and ignore header lines: header1 = f.readline() header2 = f.readline() # Loop over lines and extract variables of interest for line in f: line = line.strip() columns = line.split() if (len(columns) == 4) : # Put the values read into a list: L30cm.append(float(columns[0])) eL30cm.append(float(columns[1])) L2m.append(float(columns[2])) eL2m.append(float(columns[3])) print " Measurement %3d: L(30cmR): %6.3f +- %6.3f m L(2mFR): %6.3f +- %6.3f m"%(int(sum0), L30cm[-1], eL30cm[-1], L2m[-1], eL2m[-1]) # Histograms to show the distribution of ALL measurements (i.e. 0-5 meters): Hist_L30cm.Fill(L30cm[-1]) Hist_L2m.Fill(L2m[-1]) # Sums to get the "naive" means and RMSs: sum0 += 1 sum1 += L30cm[-1] sum2 += L30cm[-1]*L30cm[-1] sum0_2m += 1 sum1_2m += L2m[-1] sum2_2m += L2m[-1]*L2m[-1] f.close() # ----------------------------------------------------------------------------------- # # Calculate mean, RMS and from that error on the mean: mean = sum1/sum0 # Calculate the mean sigma = sqrt(sum2/sum0 - mean*mean) # Calculate the RMS (remember: RMS = sqrt( - ^2)) sigma_mean = sigma / sqrt(sum0) # Uncertainty of the mean is RMS / sqrt(N) mean2m = sum1_2m/sum0_2m # Calculate the mean sigma2m = sqrt(sum2_2m/sum0_2m - mean2m*mean2m) # Calculate the RMS (remember: RMS = sqrt( - ^2)) sigma_mean2m = sigma2m / sqrt(sum0_2m) # Uncertainty of the mean is RMS / sqrt(N) # This is the "naive" mean, in that it simply uses all measurements without any inspection. print " 30cm: Naive Mean = %6.4f +- %6.4f RMS = %6.4f (N = %2d)"%(mean, sigma_mean, sigma, int(sum0)) print " 2m: Naive Mean = %6.4f +- %6.4f RMS = %6.4f (N = %2d)"%(mean2m, sigma_mean2m, sigma2m, int(sum0_2m)) # ----------------------------------------------------------------------------------- # # Plot histograms on the screen in one canvas and fit with Gaussians: canvas = TCanvas( "canvas", "canvas", 50, 50, 1200, 600 ) canvas.Divide(1,2) # Set all the trimmings for the histogram plotting: canvas.cd(1) Hist_L30cm.GetXaxis().SetRangeUser(3.0, 4.0) # From the histogram, we get the x-axis, and set the range. Hist_L30cm.GetXaxis().SetTitle("Table length (m)") # We give the x-axis a label. Hist_L30cm.GetYaxis().SetTitle("Frequency") # We give the y-axis a label. Hist_L30cm.SetLineColor(kBlue) # Set the histogram line color to blue. Hist_L30cm.SetLineWidth(2) # Set the histogram line width to two. # Define a resonable fitting function: fit_L30cm = TF1("fit_L30cm", "gaus", 3.25, 3.45) # Define a (predefined) Gaussian fitting function. fit_L30cm.SetParameters(50.0, 3.36, 0.01) # Set the starting parameters to reasonable values. fit_L30cm.SetLineColor(kRed) # Set the line color to red. fit_L30cm.SetLineWidth(2) # Set tne line width to 2. Hist_L30cm.Fit("fit_L30cm", "R") # Note the two options: L for Likelihood and R for Range. Hist_L30cm.Draw() # Draw the histogram canvas.cd(2) Hist_L2m.GetXaxis().SetRangeUser(3.0, 4.0) # From the histogram, we get the x-axis, and set the range. Hist_L2m.GetXaxis().SetTitle("Table length (m)") # We give the x-axis a label. Hist_L2m.GetYaxis().SetTitle("Frequency") # We give the y-axis a label. Hist_L2m.SetLineColor(kBlue) # Set the histogram line color to blue. Hist_L2m.SetLineWidth(2) # Set the histogram line width to two. # Define a resonable fitting function: fit_L2m = TF1("fit_L2m", "gaus", 3.25, 3.45) # Define a (predefined) Gaussian fitting function. fit_L2m.SetParameters(50.0, 3.36, 0.01) # Set the starting parameters to reasonable values. fit_L2m.SetLineColor(kRed) # Set the line color to red. fit_L2m.SetLineWidth(2) # Set tne line width to 2. Hist_L2m.Fit("fit_L2m", "R") # Note the two options: L for Likelihood and R for Range. Hist_L2m.Draw() if (SavePlots): canvas.SaveAs("TableMeasurements.png") # Save plot (format follow extension name) # ----------------------------------------------------------------------------------- # # Solution note: # -------------- # These fits show, that the true value lies around the range 3.362-3.364, and that the # uncertainty of "a typical but correct" measurement is 7mm for the 30cm ruler, and 3mm # for the 2m folding rule. # I decide to only consider measurements in the 3sigma range obtained from the fit. # # In the 30cm case, I note that there are also a significant amount of measurements # +-30cm from this range. This is clearly measurements, where the number of 30cm rulers # has been off by one. As this is an understood problem, I decide to also include these # measurements, provided that they after the 30cm correction is within the 3sigma range. # ----------------------------------------------------------------------------------- # # ----------------------------------------------------------------------------------- # # ----------------------------------------------------------------------------------- # # Detailed data analysis for unweighted result: # ----------------------------------------------------------------------------------- # # ----------------------------------------------------------------------------------- # # Define sums for the unweighted results: sum0_uw30 = 0.0 sum1_uw30 = 0.0 sum2_uw30 = 0.0 sum0_uw2m = 0.0 sum1_uw2m = 0.0 sum2_uw2m = 0.0 # Also define a new list with those results that are accepted to be within a reasonable range: # This will save us asking the same requirement later, and also for the 30cm correction! L30cmGood = [] eL30cmGood = [] L2mGood = [] eL2mGood = [] # Define the ranges within which measurements are accepted (+-3 sigma of the fitted peak): min30cm = fit_L30cm.GetParameter(1) - 3.0*fit_L30cm.GetParameter(2) max30cm = fit_L30cm.GetParameter(1) + 3.0*fit_L30cm.GetParameter(2) print " 30cm: Only accept measurements in the range: %5.3f - %5.3f"%(min30cm,max30cm) min2m = fit_L2m.GetParameter(1) - 3.0*fit_L2m.GetParameter(2) max2m = fit_L2m.GetParameter(1) + 3.0*fit_L2m.GetParameter(2) print " 2m: Only accept measurements in the range: %5.3f - %5.3f"%(min2m,max2m) # Loop over the 30cm measurements, and include only those in an acceptable # range (possibly corrected): for i in range ( len(L30cm) ) : if (min30cm < L30cm[i] < max30cm) : sum0_uw30 += 1.0 sum1_uw30 += L30cm[i] sum2_uw30 += L30cm[i]*L30cm[i] L30cmGood.append(L30cm[i]) eL30cmGood.append(eL30cm[i]) if (Include30cmOff and min30cm < L30cm[i]-0.30 < max30cm) : sum0_uw30 += 1.0 sum1_uw30 += L30cm[i]-0.30 sum2_uw30 += (L30cm[i]-0.30)*(L30cm[i]-0.30) L30cmGood.append(L30cm[i]-0.30) eL30cmGood.append(eL30cm[i]) if (Include30cmOff and min30cm < L30cm[i]+0.30 < max30cm) : sum0_uw30 += 1.0 sum1_uw30 += L30cm[i]+0.30 sum2_uw30 += (L30cm[i]+0.30)*(L30cm[i]+0.30) L30cmGood.append(L30cm[i]+0.30) eL30cmGood.append(eL30cm[i]) mean_uw30 = sum1_uw30/sum0_uw30 # Calculate the mean sigma_uw30 = sqrt(sum2_uw30/sum0_uw30 - mean_uw30*mean_uw30) # Calculate the RMS emean_uw30 = sigma_uw30 / sqrt(sum0_uw30) # Uncertainty of the mean is RMS / sqrt(N) print " 30cm: Unweighted Mean = %7.5f +- %7.5f RMS = %7.5f (N = %2d)"%(mean_uw30, emean_uw30, sigma_uw30, int(sum0_uw30)) # Loop over 2m measurements, only accepting measurements in a narrow range: for i in range ( len(L2m) ) : if (min2m < L2m[i] < max2m) : sum0_uw2m += 1.0 sum1_uw2m += L2m[i] sum2_uw2m += L2m[i]*L2m[i] L2mGood.append(L2m[i]) eL2mGood.append(eL2m[i]) mean_uw2m = sum1_uw2m/sum0_uw2m # Calculate the mean sigma_uw2m = sqrt(sum2_uw2m/sum0_uw2m - mean_uw2m*mean_uw2m) # Calculate the RMS emean_uw2m = sigma_uw2m / sqrt(sum0_uw2m) # Uncertainty of the mean is RMS / sqrt(N) print " 2m: Unweighted Mean = %7.5f +- %7.5f RMS = %7.5f (N = %2d)"%(mean_uw2m, emean_uw2m, sigma_uw2m, int(sum0_uw2m)) # ----------------------------------------------------------------------------------- # # Solution note: # -------------- # The reduction of the uncertainty from the "naive" 138mm to 0.7mm is an improvement by # a factor of about 190, which corresponds to 190*190 = 36100 as many measurements!!! # So if the naive uncertainty was to be decreased to the more clever (unweighted) one, # It would require the entire grown up Danish population to measure it! # # Regarding the inclusion of measurements with +-30cm bias, I note that the uncertainty # on the mean goes down from 0.75mm to 0.72mm, which means that they contribute. As the # two means are very close, the additional measurements are probably OK. But it is also # OK not to include them. # ----------------------------------------------------------------------------------- # # ----------------------------------------------------------------------------------- # # ----------------------------------------------------------------------------------- # # Detailed data analysis for weighted result - looking closer at the uncertainties: # ----------------------------------------------------------------------------------- # # ----------------------------------------------------------------------------------- # # Investigate, if the errors are correlated with the residual |value - mean|: # --------------------------------------------------------------------------- # To measure the correlation, we need to define several sums: sum0_30 = 0.0 sumx_30 = 0.0 sumx2_30 = 0.0 sumy_30 = 0.0 sumy2_30 = 0.0 sumxy_30 = 0.0 Hist_30cm_RvsE = TH2F("Hist_30cm_RvsE", "Length residual vs. uncertainty estimates", 100, 0.0, 0.02, 100, 0.0, 0.05); for i in range ( len(L30cmGood) ) : residual = fabs(L30cmGood[i]-mean_uw30) # Check if measurement is in good range AND if the error and residual are also in a reasonable range: if (eL30cmGood[i] > 0.0 and eL30cmGood[i] < 0.05 and residual < 0.02) : sum0_30 += 1 sumx_30 += residual sumx2_30 += residual*residual sumy_30 += eL30cmGood[i] sumy2_30 += eL30cmGood[i]*eL30cmGood[i] sumxy_30 += residual*eL30cmGood[i] Hist_30cm_RvsE.Fill(residual, eL30cmGood[i]) # 2D (scatter) plot! # Calculate the linear correlation coefficient: meanx_30 = sumx_30 / sum0_30 sigmax_30 = sqrt(sumx2_30/sum0_30 - meanx_30*meanx_30) meany_30 = sumy_30 / sum0_30 sigmay_30 = sqrt(sumy2_30/sum0_30 - meany_30*meany_30) rhoxy_30 = (sumxy_30/sum0_30 - meanx_30*meany_30) / (sigmax_30 * sigmay_30) print " 30cm: Mean x: %6.4f RMS x: %6.4f Mean y: %6.4f RMS y: %6.4f rho(xy): %6.3f"%(meanx_30, sigmax_30, meany_30, sigmay_30, rhoxy_30) # Repeat for 2m folding stick: # ---------------------------- sum0_2m = 0.0 sumx_2m = 0.0 sumx2_2m = 0.0 sumy_2m = 0.0 sumy2_2m = 0.0 sumxy_2m = 0.0 Hist_2m_RvsE = TH2F("Hist_2m_RvsE", "Length residual vs. uncertainty estimates", 100, 0.0, 0.02, 100, 0.0, 0.05); for i in range ( len(L2mGood) ) : residual = fabs(L2mGood[i]-mean_uw2m) if (eL2mGood[i] > 0.0 and eL2mGood[i] < 0.05 and residual < 0.02) : sum0_2m += 1 sumx_2m += residual sumx2_2m += residual*residual sumy_2m += eL2mGood[i] sumy2_2m += eL2mGood[i]*eL2mGood[i] sumxy_2m += residual*eL2mGood[i] Hist_2m_RvsE.Fill(residual, eL2mGood[i]) # Calculate the linear correlation coefficient: meanx_2m = sumx_2m / sum0_2m sigmax_2m = sqrt(sumx2_2m/sum0_2m - meanx_2m*meanx_2m) meany_2m = sumy_2m / sum0_2m sigmay_2m = sqrt(sumy2_2m/sum0_2m - meany_2m*meany_2m) rhoxy_2m = (sumxy_2m/sum0_2m - meanx_2m*meany_2m) / (sigmax_2m * sigmay_2m) print " 2m: Mean x: %6.4f RMS x: %6.4f Mean y: %6.4f RMS y: %6.4f rho(xy): %6.3f"%(meanx_2m, sigmax_2m, meany_2m, sigmay_2m, rhoxy_2m) # Draw the uncertainties vs. the residuals: canvas2 = TCanvas( "canvas2", "canvas2", 100, 100, 1200, 600 ) canvas2.Divide(2,1) canvas2.cd(1) Hist_30cm_RvsE.GetXaxis().SetRangeUser(0.0, 0.02) # From the histogram, we get the x-axis, and set the range. Hist_30cm_RvsE.GetXaxis().SetTitle("Residual (m)") # We give the x-axis a label. Hist_30cm_RvsE.GetYaxis().SetTitle("Uncertainty (m)") # We give the y-axis a label. Hist_30cm_RvsE.SetMarkerColor(kBlue) # Set the histogram line color to blue. Hist_30cm_RvsE.SetMarkerStyle(20) # Set the histogram line width to two. Hist_30cm_RvsE.Draw() # Draw the histogram canvas2.cd(2) Hist_2m_RvsE.GetXaxis().SetRangeUser(0.0, 0.01) # From the histogram, we get the x-axis, and set the range. Hist_2m_RvsE.GetXaxis().SetTitle("Residual (m)") # We give the x-axis a label. Hist_2m_RvsE.GetYaxis().SetTitle("Uncertainty (m)") # We give the y-axis a label. Hist_2m_RvsE.SetMarkerColor(kBlue) # Set the histogram line color to blue. Hist_2m_RvsE.SetMarkerStyle(20) # Set the histogram line width to two. Hist_2m_RvsE.Draw() # Draw the histogram if (SavePlots) : canvas2.SaveAs("Correlations_RvsE.png") # Save plot (format follow extension name) # ----------------------------------------------------------------------------------- # # Solution note: # -------------- # The purpose of the question of correlation was mainly to ensure, that you tried to # calculate this yourself! However note that even perfect errors would not have a # correlation coefficient very high! But it does give some hint of whether the # uncertainties given are worth anything or not. And it is always VERY healthy to # see a plot of the distributions, to check for something strange. # # The real test of whether the uncertainties are reliable, is obtained by making a # so-called pull plot, which means plotting the pull (pull = (x_i - mean) / sigma_i) # distribution. # If the errors are credible, it should be a unit Gaussian (think about why!). # ----------------------------------------------------------------------------------- # # Pull distributions: Hist_30cm_Pull = TH1F("Hist_30cm_Pull", "Pull distribution - 30cm", 100, -10.0, 10.0); for i in range ( len(L30cmGood) ) : if (eL30cmGood[i] > 0.0) : pull = (L30cmGood[i]-mean_uw30) / eL30cmGood[i] if (fabs(pull) > 5.0) : # Give warning, if any points are more than 5 sigma away! print " 30cm: Warning! Large pull: measurement = %6.4f +- %6.4f pull = %6.4f"%(L30cmGood[i], eL30cmGood[i], pull) Hist_30cm_Pull.Fill(pull) Hist_2m_Pull = TH1F("Hist_2m_Pull", "Pull distribution - 2m", 100, -10.0, 10.0); for i in range ( len(L2mGood) ) : if (eL2mGood[i] > 0.0) : pull = (L2mGood[i]-mean_uw30) / eL2mGood[i] if (fabs(pull) > 5.0) : # Give warning, if any points are more than 5 sigma away! print " 2m: Warning! Large pull: measurement = %6.4f +- %6.4f pull = %6.4f"%(L2mGood[i], eL2mGood[i], pull) Hist_2m_Pull.Fill(pull) canvas3 = TCanvas( "canvas3", "canvas3", 150, 150, 1200, 600 ) canvas3.Divide(2,1) canvas3.cd(1) Hist_30cm_Pull.GetXaxis().SetRangeUser(-10.0, 10.0) # From the histogram, we get the x-axis, and set the range. Hist_30cm_Pull.GetXaxis().SetTitle("Pull") # We give the x-axis a label. Hist_30cm_Pull.GetYaxis().SetTitle("Frequency") # We give the y-axis a label. Hist_30cm_Pull.SetMarkerColor(kBlue) # Set the histogram line color to blue. Hist_30cm_Pull.SetMarkerStyle(20) # Set the histogram line width to two. fit_pull30 = TF1("fit_pull30", "gaus", -4.0, 4.0) # Define a (predefined) Gaussian fitting function. fit_pull30.SetLineColor(kRed) # Set the line color to red. fit_pull30.SetLineWidth(2) # Set tne line width to 2. Hist_30cm_Pull.Fit("fit_pull30", "RL") # Note the two options: L for Likelihood and R for Range. Hist_30cm_Pull.Draw() # Draw the histogram canvas3.cd(2) Hist_2m_Pull.GetXaxis().SetRangeUser(10.0, 10.0) # From the histogram, we get the x-axis, and set the range. Hist_2m_Pull.GetXaxis().SetTitle("Pull") # We give the x-axis a label. Hist_2m_Pull.GetYaxis().SetTitle("Frequency") # We give the y-axis a label. Hist_2m_Pull.SetMarkerColor(kBlue) # Set the histogram line color to blue. Hist_2m_Pull.SetMarkerStyle(20) # Set the histogram line width to two. fit_pull2m = TF1("fit_pull2m", "gaus", -4.0, 4.0) # Define a (predefined) Gaussian fitting function. fit_pull2m.SetLineColor(kRed) # Set the line color to red. fit_pull2m.SetLineWidth(2) # Set tne line width to 2. Hist_2m_Pull.Fit("fit_pull2m", "RL") # Note the two options: L for Likelihood and R for Range. Hist_2m_Pull.Draw() # Draw the histogram if (SavePlots) : canvas3.SaveAs("Distribution_Pull.png") # Save plot (format follow extension name) # ----------------------------------------------------------------------------------- # # Solution note: # -------------- # The pull distributions show, that the uncertainties given are too large (sigma in # the Gaussian fit is less than 1.0, so one is dividing by too large errors!), but # not by very much. So the errors given by those who did a good measurement # actually look reasonably trustworthy, except that there are large peaks around # zero, which are from serious overestimates of the uncertainty. # # Also, the pull distribution shows that one measurement of those of 30cm is more # than 8sigma away from the mean. This measurement I will discard, as either the # measurement has an error, or the uncertainty is much underestimated. # ----------------------------------------------------------------------------------- # # Calculate the weighted means: # --------------------------------------------------------------------------- # Define sums for the weighted results (note that "Nw30" is only for counting # number of measurements used, and not used in the calculations): Nw30 = 0 sum0_w30 = 0.0 sum1_w30 = 0.0 # Also check what the minimum and maximum uncertainties are (and possibly enlarge, if not trustworthy!): emin30 = 999.9 emax30 = 0.0 # Define a new function to find maximum of two numbers: def max (a,b) : if (a>b) : return a else : return b for i in range ( len(L30cmGood) ) : if (eL30cmGood[i] > 0.0) : pull = (L30cmGood[i]-mean_uw30) / eL30cmGood[i] if (fabs(pull) < 5.0) : Nw30 += 1 elowerlimit = max(0.002, eL30cmGood[i]) # Set the minimum possible error to 2mm!!! sum0_w30 += 1.0 / (elowerlimit*elowerlimit) sum1_w30 += L30cmGood[i] / (elowerlimit*elowerlimit) if (eL30cmGood[i] < emin30) : emin30 = eL30cmGood[i] if (eL30cmGood[i] > emax30) : emax30 = eL30cmGood[i] # Calculate the weighted mean (and check error range): mean_w30 = sum1_w30 / sum0_w30 emean_w30 = sqrt(1.0 / sum0_w30) * fit_pull30.GetParameter(2) # I multiply with the pull width to make up for # general overestimates of the uncertainties. print " 30cm: Weighted mean = %7.5f +- %7.5f (N=%3d)"%(mean_w30, emean_w30, Nw30) print " 30cm: Smallest error is: %7.5f Is that realistic?"%emin30 print " 30cm: Largest error is: %7.5f Is that realistic?"%emax30 # Repeat for 2m results: # ---------------------- Nw2m = 0 sum0_w2m = 0.0 sum1_w2m = 0.0 emin2m = 999.9 emax2m = 0.0 for i in range ( len(L2mGood) ) : if (eL2mGood[i] > 0.0) : pull = (L2mGood[i]-mean_uw30) / eL2mGood[i] if (fabs(pull) < 5.0) : Nw2m += 1 elowerlimit = max(0.001, eL30cmGood[i]) # Set the minimum possible error to 1mm!!! sum0_w2m += 1.0 / (elowerlimit*elowerlimit) sum1_w2m += L2mGood[i] / (elowerlimit*elowerlimit) if (eL2mGood[i] < emin2m) : emin2m = eL2mGood[i] if (eL2mGood[i] > emax2m) : emax2m = eL2mGood[i] # Calculate the weighted mean (and check error range): mean_w2m = sum1_w2m / sum0_w2m emean_w2m = sqrt(1.0 / sum0_w2m) * fit_pull2m.GetParameter(2) # I multiply with the pull width to make up for # general overestimates of the uncertainties. print " 2m: Weighted mean = %7.5f +- %7.5f (N=%3d)"%(mean_w2m, emean_w2m, Nw2m) print " 2m: Smallest error is: %7.5f Is that realistic?"%emin2m print " 2m: Largest error is: %7.5f Is that realistic?"%emax2m # ----------------------------------------------------------------------------------- # # Solution note: # -------------- # The weighted means make use of the uncertainties, and further decrease the error # on the final mean. In order for the uncertainties used to be realistic, I scale # them by the width of the pull distribution (think about why!). # # The final result is indeed very accurate. In order to check, if they are consistent # we consider the difference and its uncertainty, both in the unweighted and weighted # case. # ----------------------------------------------------------------------------------- # def sqr (a) : return a*a diff_uw = mean_uw30 - mean_uw2m ediff_uw = sqrt(sqr(emean_uw30) + sqr(emean_uw2m)) print " The difference between the unweighted results is: diff = %7.5f +- %7.5f (%3.1f sigma)"%(diff_uw, ediff_uw, fabs(diff_uw/ediff_uw)) diff_w = mean_w30 - mean_w2m ediff_w = sqrt(sqr(emean_w30) + sqr(emean_w2m)) print " The difference between the unweighted results is: diff = %7.5f +- %7.5f (%3.1f sigma)"%(diff_w, ediff_w, fabs(diff_w/ediff_w)) # ----------------------------------------------------------------------------------- # # Solution note: # -------------- # The results agree remarkably well (I suspected it would be worse!), and thus we can # actually have some confidence in the very small uncertainties. # # The completely final result is of course a weighted average of the two! # ----------------------------------------------------------------------------------- # raw_input('Press Enter to exit') # ----------------------------------------------------------------------------------- # # # Start by taking a close look at the data, first by inspecting the numbers in the data # file, and then by considering the histograms produced by running the macro. # # To begin with, only consider the 30cm ruler measurements from 2013, and disregard the # estimated/guessed uncertainties. You can then expand from there, as guided below with # questions. # # Questions: # ---------- # 1) Consider the calculated mean and width, and make sure that you understand how # they were performed. Is the result as you would expect it? And do you think that # it is close to the best possible (i.e. most precise) result? # # 2) Do any of the measurements looks wrong/bad/suspecious? Do you see any repeated # mistakes done? Would you correct or exclude any of the measurements and how would # you justify this? # If so, make sure that your program does this, and recalculate the result (with sums). # Also, fit the length measurements with a Gaussian distribution. Should the binning # be changed for these fits? And is the Gaussian distribution justified? # # 3) Write the number of measurements you accepted for the length determination along # with your result (mean and error on mean) into the questionaire. # # You should at least reach this point in the exercise! And preferably much further!!! # Of the questions below, you should perhaps focus mostly on 5, 6 and 7. # # 4) (Slightly difficult question) # Is there any correlation between the errors on the measurements and the actual # distance to the mean (this is called the "residual")? That is, do the errors # contain useful information, that should be used in the calculation of the most # accurate estimate of the mean? # In all simplicity - did those who had a measurement close to the mean also put down a # small uncertainty? What is the correlation between residuals and the uncertainties? # Yes, define new sums as discussed in the lectures, and calculate the correlation! # # 5) Compare the size of the residuals with the size of the uncertainties. Were people too # optimistic or too pesimistic (or were the uncertainty estimates/guesses good)? # # 6) Whether or not you found a large enough correlation above, try to calculate the # weighted mean. Do you need to discard more dubious measurements? Did the result # improve? Do you trust this result more? # # 7) Repeat the above questions for the 2m folding rule. # # 8) How much did the single measurement uncertainty go down from the 30cm ruler case # to the 2m folding rule? Can you quantify how much better the 2m folding rule is? # # # Advanced questions: # ------------------- # 1) Does the length of the table seems to be different when measured with a 30cm and a 2m # ruler? Quantify this statement! I.e. what is the difference, and what is the # uncertainty on that difference? Is this significantly away from 0? How big an # effect on the 30cm ruler would this correspond to? # # 2) If you were asked for the best estimate of the length of the table, what would # you do? (If posssible, read Bevington page 58 bottom!) # # # #----------------------------------------------------------------------------------