#!/usr/bin/env python # ----------------------------------------------------------------------------------- # # # Python program for analysing measurements of the length of the lecture table in # Auditorium A at NBI. # # There are two measurements each with an estimated error of the table length: # 1: Measurement with a 30cm ruler. # 2: Measurement with a 2m folding ruler. # # Each person was asked not only to state the measurement, but also their estimated # uncertainty. None of the persons could see others measurements in order to get the # largest degree of independence. Also, the 30cm ruler measurement was asked to be # done first. # # Author: Troels C. Petersen (NBI) # Email: petersen@nbi.dk # Date: 22nd of August 2014 # # ----------------------------------------------------------------------------------- # from ROOT import * import os, math # Set the showing of statistics and fitting results (0 means off, 1 means on): gStyle.SetOptStat("emr"); # See http://root.cern.ch/root/html/TStyle.html#TStyle:SetOptStat gStyle.SetOptFit(1111); # See http://root.cern.ch/root/html/TStyle.html#TStyle:SetOptFit verbose = True SavePlots = False # Define two histograms with the lengths recorded and the correlation between length residual and uncertainty: Hist_L30cm = TH1F("Hist_L30cm", "Lengths estimates by 30cm ruler", 250, 0.0, 5.0); # Define lists to contain the measurements, such that you can easily loop over them later! L30cm = [] eL30cm = [] L2m = [] eL2m = [] # Define sums, from which the mean and RMS can be calculated: sum0_30raw = 0.0 sum1_30raw = 0.0 sum2_30raw = 0.0 # Data files: infiles = ["data_TableMeasurements2009.txt", "data_TableMeasurements2010.txt", "data_TableMeasurements2011.txt", "data_TableMeasurements2012.txt", "data_TableMeasurements2013.txt", "data_TableMeasurements2014.txt"] # Loop over files and open them for infile in infiles : with open( infile, 'r' ) as f : # Read and ignore header lines: header1 = f.readline() header2 = f.readline() # Loop over lines and extract variables of interest for line in f: line = line.strip() columns = line.split() if (len(columns) == 4) : # Check that there are four numbers! # Record (and print) the measurements: L30cm.append(float(columns[0])) eL30cm.append(float(columns[1])) L2m.append(float(columns[2])) eL2m.append(float(columns[3])) if (verbose) : print " Measurement %3d: L(30cmR): %6.3f +- %6.3f m L(2mFR): %6.3f +- %6.3f m"%(int(sum0_30raw), L30cm[-1], eL30cm[-1], L2m[-1], eL2m[-1]) # Plot measurements: Hist_L30cm.Fill(L30cm[-1]) # Make sums to get a quick check of result: sum0_30raw += 1 sum1_30raw += L30cm[-1] sum2_30raw += L30cm[-1]*L30cm[-1] # Make sure that you are warned about data lines NOT used! else : print "Warning: Line read did not have 4 entries but ", len(columns) f.close() # ----------------------------------------------------------------------------------- # # Calculate raw/naive/uncorrected mean, RMS and from that error on the mean: if (sum0_30raw > 1) : mean_30raw = sum1_30raw/sum0_30raw # Calculate the mean RMS_30raw = sqrt(sum2_30raw/sum0_30raw - mean_30raw*mean_30raw) # Calculate the RMS emean_30raw = RMS_30raw / sqrt(sum0_30raw) # Uncertainty of the mean is RMS / sqrt(N) # This is the "naive" mean, in that it simply uses all measurements without any inspection. print " Mean = %6.4f +- %6.4f RMS = %6.4f"%(mean_30raw, emean_30raw, RMS_30raw) # ----------------------------------------------------------------------------------- # # Plot histogram on the screen in one canvas: canvas = TCanvas( "canvas", "canvas", 50, 50, 1200, 600 ) # Set all the trimmings for the histogram plotting: Hist_L30cm.GetXaxis().SetRangeUser(0.0, 5.0) # From the histogram, we get the x-axis, and set the range. Hist_L30cm.GetXaxis().SetTitle("Table length (m)") # We give the x-axis a label. Hist_L30cm.GetYaxis().SetTitle("Frequency") # We give the y-axis a label. Hist_L30cm.SetLineColor(kBlue) # Set the histogram line color to blue. Hist_L30cm.SetLineWidth(2) # Set the histogram line width to two. # Define a resonable fitting function: fit_L30cm = TF1("fit_L30cm", "gaus", 0.0, 5.0) # Define a (predefined) Gaussian fitting function. fit_L30cm.SetLineColor(kRed) # Set the line color to red. fit_L30cm.SetLineWidth(2) # Set tne line width to 2. # Fit the histogram (with a likelihood function in the range the function is defined in): Hist_L30cm.Fit("fit_L30cm", "LR") # Note the two options: L for Likelihood and R for Range. Hist_L30cm.Draw() # Draw the histogram if (SavePlots): canvas.SaveAs("TableMeasurements_30cmRuler_Raw.pdf") # Save plot (format follow extension name) # ----------------------------------------------------------------------------------- # raw_input('Press Enter to exit') # ----------------------------------------------------------------------------------- # # # Start by taking a close look at the data, first by inspecting the numbers in the data # file, and then by considering the histograms produced by running the macro. # # To begin with, only consider the 30cm ruler measurements, and disregard the # estimated/guessed uncertainties. You can then expand from there, as guided below by # questions. # # Questions: # ---------- # 1) Consider the calculated/naive mean and width, and make sure that you understand how # they were performed. Is the result as you would expect it? And do you think that # it is close to the best possible (i.e. most accurate and precise) estimate? # NOTE: Make sure that you know the difference between accuracy and precision!!! # See "Common definition" in: http://en.wikipedia.org/wiki/Accuracy_and_precision # # 2) Do any of the measurements looks wrong/bad/suspecious? Do you see any repeated # mistakes done? Would you correct or exclude any of the measurements and how would # you justify this? # Also, fit the length measurements with a Gaussian distribution in a small range. # Should the binning be changed for these fits? And is the Gaussian distribution # justified? # # 3) Once you have selected the measurements you want to use, calculate the mean, RMS # and uncertainty on the mean (using sums). How much did your result improve in # precision? # # 4) Does the length of the table seems to be different between the two sides? Quantify # this statement! I.e. what is the difference, and what is the uncertainty on this # difference? # # ------------------- Now repeat the above for the 2m folding rule ---------------------- # # 5) How much better/worse is the single measurement uncertainty from the 30cm ruler case # to the 2m folding rule? # # 6) The "Pull" distribution is defined as the plot of z = (x - ) / sigma(x). If the # measurements and uncertainties are good, then it should give a unit Gaussian. Is # that the case? And thus, were the uncertainty estimates/guesses reasonable? # # 7) Try to calculate the weighted mean. Do you need to discard more dubious measurements? # Did the result improve further? # # 8) Does the length of the table seems to be different when measured with a 30cm and a 2m # ruler? Quantify this statement! I.e. what is the difference, and what is the # uncertainty on that difference? Is this significantly away from 0? How big an # effect on the 30cm ruler would this correspond to? # # 9) If you were asked for the best estimate of the length of the table, what would # you do? (If posssible, read Bevington page 58 bottom!) # # # Advanced questions: # ------------------- # 10) Is there any correlation between the errors on the measurements and the actual # distance to the mean (this is called the "residual")? That is, do the errors # contain useful information, that should be used in the calculation of the most # accurate estimate of the mean? # In all simplicity - did those who had a measurement close to the mean also put down a # small uncertainty? What is the correlation between residuals and the uncertainties? # Yes, define new sums as discussed in the lectures, and calculate the correlation! # # #---------------------------------------------------------------------------------- # # Key questions: # -------------- # https://docs.google.com/forms/d/1SjQViqu0dPFTITnxkiMXxv1VIeYG8J8oB0fqYBvOlmA/viewform?usp=send_form # Please answer the following questions both for the 30cm ruler and for the 2m folding # rule! # * Write the number of measurements you accepted for the length determination along # with your result (mean and error on mean) into the questionaire. Do this both for # the unweighted and weighted result. # * What is the Chi2 and its probability when fitting a Gaussian to the measurements? # #----------------------------------------------------------------------------------