#!/usr/bin/env python # ----------------------------------------------------------------------------------- # # # Python program for analysing measurements of the length of the lecture table in # Auditorium A at NBI. # # There are two measurements each with an estimated error of the table length: # 1: Measurement with a 30cm ruler. # 2: Measurement with a 2m folding ruler. # # Each person was asked not only to state the measurement, but also their estimated # uncertainty. None of the persons could see others measurements in order to get the # largest degree of independence. Also, the 30cm ruler measurement was asked to be # done first. Last but not least, it was encouraged to include millimeters, even if # the precision was significant less (for binning reasons). # # Author: Troels C. Petersen (NBI) # Email: petersen@nbi.dk # Date: 19th of November 2016 # # ----------------------------------------------------------------------------------- # from ROOT import * import os, math # Set the showing of statistics and fitting results (0 means off, 1 means on): gStyle.SetOptStat("emr"); # See http://root.cern.ch/root/html/TStyle.html#TStyle:SetOptStat gStyle.SetOptFit(1111); # See http://root.cern.ch/root/html/TStyle.html#TStyle:SetOptFit # Define if things are printed to the screen, and written to files: verbose = True SavePlots = True # Define the function "sqr", which squares whatever you give it: def sqr(a) : return a*a # ----------------------------------------------------------------------------------- # # Define a histogram with the lengths recorded: Hist_L30cm = TH1F("Hist_L30cm", "Lengths estimates by 30cm ruler", 200, 0.0, 5.0); # NOTE: I thought a bit about the binning, and decided that since one of course should include # ALL measurements (at least in the first plot), then as one would also like to be able # to note features of less than 1cm, I choose 1000 bins, giving a bin size of 5mm. # However, additional plots might want to choose differently for both range and binning. # Define lists to contain the measurements, such that you can easily loop over them later! L30cm = [] eL30cm = [] L2m = [] eL2m = [] # Data files: infiles = ["data_TableMeasurements2009.txt", "data_TableMeasurements2010.txt", "data_TableMeasurements2011.txt", "data_TableMeasurements2012.txt", "data_TableMeasurements2013.txt", "data_TableMeasurements2014.txt", "data_TableMeasurements2015.txt", "data_TableMeasurements2016.txt"] # Loop over files and open them for infile in infiles : with open( infile, 'r' ) as f : # Read and ignore header lines (the ones showing what the numbers represents): header1 = f.readline() header2 = f.readline() # Loop over lines in the files and extract variables of interest: for line in f: line = line.strip() # Remove all "strange" characters columns = line.split() # Split the line into its parts (columns is now a list) if (len(columns) == 4) : # Check that there are four numbers! # Record (and print) the measurements: L30cm.append(float(columns[0])) eL30cm.append(float(columns[1])) L2m.append(float(columns[2])) eL2m.append(float(columns[3])) if (verbose) : print " Measurement %3d: L(30cm): %6.3f +- %6.3f m L(2m): %6.3f +- %6.3f m"%(int(len(L30cm)), L30cm[-1], eL30cm[-1], L2m[-1], eL2m[-1]) # print " %6.3f %6.3f %6.3f %6.3f"%(L30cm[-1], eL30cm[-1], L2m[-1], eL2m[-1]) # Plot measurements into histogram: Hist_L30cm.Fill(L30cm[-1]) # Make sure that you are warned about data lines NOT used! else : print "Warning: Line read did not have 4 entries but ", len(columns) f.close() # ----------------------------------------------------------------------------------- # # Calculate raw/naive/uncorrected mean, RMS and from that error on the mean: if (len(L30cm) > 1) : # Check that there is more than one measurement (as we divide by N-1)! # Calculate the mean: sum1_30raw = 0.0 for i30 in range( len(L30cm) ) : sum1_30raw += L30cm[i30] mean_30raw = sum1_30raw / len(L30cm) # Calculate mean # Calculate the RMS: sum2_30raw = 0.0 for i30 in range( len(L30cm) ) : sum2_30raw += sqr(L30cm[i30] - mean_30raw) RMS_30raw = sqrt(sum2_30raw / (len(L30cm)-1)) # Calculate RMS emean_30raw = RMS_30raw / sqrt(len(L30cm)) # Uncertainty on mean is RMS / sqrt(N) # This is the "naive" mean, in that it simply uses all measurements without any inspection. print "\n RAW: Mean = %6.4f +- %6.4f m RMS = %6.4f m (N = %3d)\n"%(mean_30raw, emean_30raw, RMS_30raw, int(len(L30cm))) # ----------------------------------------------------------------------------------- # # Plot histogram on the screen in one canvas: canvas = TCanvas( "canvas", "canvas", 50, 50, 1200, 600 ) # Set all the trimmings for the histogram plotting: Hist_L30cm.GetXaxis().SetRangeUser(0.0, 5.0) # From the histogram, we get the x-axis, and set the range. Hist_L30cm.GetXaxis().SetTitle("Table length (m)") # We give the x-axis a label. Hist_L30cm.GetYaxis().SetTitle("Frequency") # We give the y-axis a label. Hist_L30cm.SetLineColor(kBlue) # Set the histogram line color to blue. Hist_L30cm.SetLineWidth(2) # Set the histogram line width to two. # Define a resonable fitting function: fit_L30cm = TF1("fit_L30cm", "gaus", 0.0, 5.0) # Define a (predefined) Gaussian fitting function. fit_L30cm.SetLineColor(kRed) # Set the line color to red. fit_L30cm.SetLineWidth(2) # Set tne line width to 2. # Fit the histogram (with a likelihood function in the range the function is defined in): Hist_L30cm.Fit("fit_L30cm", "LR") # Note the two options: L for Likelihood and R for Range. # I will soon introduce what "likelihood" means. Hist_L30cm.Draw() # Draw the histogram canvas.Update() if (SavePlots): canvas.SaveAs("TableMeasurements_30cmRuler_Raw.pdf") # Save plot (format follow extension name) # ----------------------------------------------------------------------------------- # raw_input('Press Enter to exit') # ----------------------------------------------------------------------------------- # # # Start by taking a close look at the data, first by inspecting the numbers in the data # file, and then by considering the histograms produced by running the macro. # # To begin with, only consider the 30cm ruler measurements, and disregard the # estimated/guessed uncertainties. You can then expand from there, as guided below by # questions. # # Questions: # ---------- # 1) Consider the calculated/naive mean and width, and make sure that you understand how # they were performed. Is the result as you would expect it? And do you think that # it is close to the best possible (i.e. most accurate and precise) estimate? # NOTE: Make sure that you know the difference between accuracy and precision!!! # See "Common definition" in: http://en.wikipedia.org/wiki/Accuracy_and_precision # # 2) Do any of the measurements looks wrong/bad/suspecious? Do you see any repeated # mistakes done? Would you correct or exclude any of the measurements and how would # you justify this? # Also, fit the length measurements with a Gaussian distribution in a small range # around what you believe is the true value. Should the binning be changed for these # fits? And is the Gaussian distribution justified? # # 3) Once you have selected the measurements you want to use, calculate the mean, RMS # and uncertainty on the mean (using sums). How much did your result improve in # precision? # # ------------------- Now repeat the above for the 2m folding rule ---------------------- # # 4) How much better/worse is the single measurement uncertainty from the 30cm ruler case # to the 2m folding rule? # # 5) The "Pull" distribution is defined as the plot of z = (x - ) / sigma(x). If the # measurements and uncertainties are good, then it should give a unit Gaussian. Is # that the case? And thus, were the uncertainty estimates/guesses reasonable? # If not, then the pull RMS is often used to scale the errors to a reasonable level. # # 6) Try to calculate the weighted mean. Do you need to discard more dubious measurements? # Did the result improve further? # # 7) Does the length of the table seems to be different when measured with a 30cm and a 2m # ruler? Quantify this statement! I.e. what is the difference, and what is the # uncertainty on that difference? Is this significantly away from 0? # # 8) If you were asked for the best estimate of the length of the table, what would # you do? (If posssible, read Bevington page 58 bottom!) # # # Advanced questions: # ------------------- # 9) Is there any correlation between the errors on the measurements and the actual # distance to the mean (this is called the "residual")? That is, do the errors # contain useful information, that should be used in the calculation of the most # accurate estimate of the mean? # In all simplicity - did those who had a measurement close to the mean also put down a # small uncertainty? What is the correlation between residuals and the uncertainties? # Yes, define new sums as discussed in the lectures, and calculate the correlation! # #----------------------------------------------------------------------------------