#!/usr/bin/env python # ----------------------------------------------------------------------------------- # # # Python program for analysing measurements of the length of the lecture table in # Auditorium A at NBI. # # There are two measurements each with estimated error of the table length: # 1: Measurement with a 30cm ruler. # 2: Measurement with a 2m folding ruler. # # Each person was asked not only to state the measurement, but also their estimated # uncertainty. None of the persons could see others measurements in order to get the # largest degree of independence. Also, the 30cm ruler measurement was asked to be # done first. # # Author: Troels C. Petersen (NBI) # Email: petersen@nbi.dk # Date: 4th of September 2012 # # Author: Lars Egholm Pedersen (NBI/CERN) # Email: egholm@cern.ch # # ----------------------------------------------------------------------------------- # from ROOT import * import os, math # Set the showing of statistics and fitting results (0 means off, 1 means on): gStyle.SetOptStat("emr"); # See http://root.cern.ch/root/html/TStyle.html#TStyle:SetOptStat gStyle.SetOptFit(1111); # See http://root.cern.ch/root/html/TStyle.html#TStyle:SetOptFit SavePlots = False # Define two histograms with the lengths recorded and the correlation between length residual and uncertainty: Hist_L30cm = TH1F("Hist_L30cm", "Lengths estimates by 30cm ruler", 250, 0.0, 5.0); Hist_30cm_RvsE = TH2F("Hist_30cm_LvsE", "Length residual vs. uncertainty estimates", 250, 0.0, 0.05, 250, 0.0, 0.05); # Define lists to contain the measurements, such that you can easily loop over them later! L30cm = [] eL30cm = [] L2m = [] eL2m = [] # Define sums, from which the mean and RMS can be calculated: sum0 = 0.0 sum1 = 0.0 sum2 = 0.0 files = ["data_TableMeasurements2009.txt", "data_TableMeasurements2010.txt", "data_TableMeasurements2011.txt", "data_TableMeasurements2012.txt", "data_TableMeasurements2013.txt"] # Loop over files and open them for file in files : with open( file, 'r' ) as f : # Read and ignore header lines: header1 = f.readline() header2 = f.readline() # Loop over lines and extract variables of interest for line in f: line = line.strip() columns = line.split() if (len(columns) == 4) : L30cm.append(float(columns[0])) eL30cm.append(float(columns[1])) L2m.append(float(columns[2])) eL2m.append(float(columns[3])) print " Measurement %3d: L(30cmR): %6.3f +- %6.3f m L(2mFR): %6.3f +- %6.3f m"%(int(sum0), L30cm[-1], eL30cm[-1], L2m[-1], eL2m[-1]) # Hist_L30cm.Fill(L30cm[-1]) # Hist_30cm_RvsE.Fill(fabs(L30cm[-1]-3.360), eL30cm[-1]) sum0 += 1 sum1 += L30cm[-1] sum2 += L30cm[-1]*L30cm[-1] f.close() # ----------------------------------------------------------------------------------- # # Calculate mean, RMS and from that error on the mean: sum1 = sum1/sum0 # Calculate the mean sum2 = sqrt(sum2/sum0 - sum1*sum1) # Calculate the RMS (remember: RMS = sqrt( - ^2)) esum1 = sum2 / sqrt(sum0) # Uncertainty of the mean is RMS / sqrt(N) # This is the "naive" mean, in that it simply uses all measurements without any inspection. print " Mean = %6.4f +- %6.4f RMS = %6.4f"%(sum1, esum1, sum2) # ----------------------------------------------------------------------------------- # # Plot histogram on the screen in one canvas: # canvas = TCanvas( "canvas", "canvas", 50, 50, 1200, 600 ) # Set all the trimmings for the histogram plotting: Hist_L30cm.GetXaxis().SetRangeUser(0.0, 5.0) # From the histogram, we get the x-axis, and set the range. Hist_L30cm.SetMinimum(0) # We also set the minimum. Hist_L30cm.GetXaxis().SetTitle("Table length (m)") # We give the x-axis a label. Hist_L30cm.GetYaxis().SetTitle("Frequency") # We give the y-axis a label. Hist_L30cm.SetLineColor(kBlue) # Set the histogram line color to blue. Hist_L30cm.SetLineWidth(2) # Set the histogram line width to two. # Define a resonable fitting function: fit_L30cm = TF1("fit_L30cm", "gaus", 0.0, 5.0) # Define a (predefined) Gaussian fitting function. fit_L30cm.SetLineColor(kRed) # Set the line color to red. fit_L30cm.SetLineWidth(2) # Set tne line width to 2. # Fit the histogram (with a likelihood function in the range the function is defined in): # Hist_L30cm.Fit("fit_L30cm", "LR") # Note the two options: L for Likelihood and R for Range. # Hist_L30cm.Draw() # Draw the histogram # if (SavePlots): # canvas.SaveAs("TableMeasurements_30cmRuler.png") # Save plot (format follow extension name) # ----------------------------------------------------------------------------------- # raw_input('Press Enter to exit') # ----------------------------------------------------------------------------------- # # # Start by taking a close look at the data, first by inspecting the numbers in the data # file, and then by considering the histograms produced by running the macro. # # To begin with, only consider the 30cm ruler measurements from 2013, and disregard the # estimated/guessed uncertainties. You can then expand from there, as guided below with # questions. # # Questions: # ---------- # 1) Consider the calculated mean and width, and make sure that you understand how # they were performed. Is the result as you would expect it? And do you think that # it is close to the best possible (i.e. most precise) result? # # 2) Do any of the measurements looks wrong/bad/suspecious? Do you see any repeated # mistakes done? Would you correct or exclude any of the measurements and how would # you justify this? # If so, make sure that your program does this, and recalculate the result (with sums). # Also, fit the length measurements with a Gaussian distribution. Should the binning # be changed for these fits? And is the Gaussian distribution justified? # # 3) Write the number of measurements you accepted for the length determination along # with your result (mean and error on mean) into the questionaire. # # You should at least reach this point in the exercise! And preferably much further!!! # Of the questions below, you should perhaps focus mostly on 5, 6 and 7. # # 4) (Slightly difficult question) # Is there any correlation between the errors on the measurements and the actual # distance to the mean (this is called the "residual")? That is, do the errors # contain useful information, that should be used in the calculation of the most # accurate estimate of the mean? # In all simplicity - did those who had a measurement close to the mean also put down a # small uncertainty? What is the correlation between residuals and the uncertainties? # Yes, define new sums as discussed in the lectures, and calculate the correlation! # # 5) Compare the size of the residuals with the size of the uncertainties. Were people too # optimistic or too pesimistic (or were the uncertainty estimates/guesses good)? # # 6) Whether or not you found a large enough correlation above, try to calculate the # weighted mean. Do you need to discard more dubious measurements? Did the result # improve? Do you trust this result more? # # 7) Repeat the above questions for the 2m folding rule. # # 8) How much did the single measurement uncertainty go down from the 30cm ruler case # to the 2m folding rule? Can you quantify how much better the 2m folding rule is? # # # Advanced questions: # ------------------- # 1) Does the length of the table seems to be different when measured with a 30cm and a 2m # ruler? Quantify this statement! I.e. what is the difference, and what is the # uncertainty on that difference? Is this significantly away from 0? How big an # effect on the 30cm ruler would this correspond to? # # 2) If you were asked for the best estimate of the length of the table, what would # you do? (If posssible, read Bevington page 58 bottom!) # # # #----------------------------------------------------------------------------------