#!/usr/bin/env python

# ----------------------------------------------------------------------------------- #
#
#   Python program for analysing measurements of the length of the lecture table in
#   Auditorium A at NBI.
# 
#   There are two measurements each with estimated error of the table length:
#     1: Measurement with a 30cm ruler.
#     2: Measurement with a 2m folding ruler.
# 
#   Each person was asked not only to state the measurement, but also their estimated
#   uncertainty. None of the persons could see others measurements in order to get the
#   largest degree of independence. Also, the 30cm ruler measurement was asked to be
#   done first.
# 
#   Author: Troels C. Petersen (NBI)
#   Email:  petersen@nbi.dk
#   Date:   7th of September 2013
#
# ----------------------------------------------------------------------------------- #
#                              SUGGESTED SOLUTION!!!
# ----------------------------------------------------------------------------------- #

from ROOT import *
import os, math

# Set the showing of statistics and fitting results (0 means off, 1 means on):
gStyle.SetOptStat("emr");   # See http://root.cern.ch/root/html/TStyle.html#TStyle:SetOptStat
gStyle.SetOptFit(1111);     # See http://root.cern.ch/root/html/TStyle.html#TStyle:SetOptFit

Include30cmOff = True
SavePlots = False


# ----------------------------------------------------------------------------------- #
# ----------------------------------------------------------------------------------- #
# Initial data analysis - look at the data and get "naive" values:
# ----------------------------------------------------------------------------------- #
# ----------------------------------------------------------------------------------- #

# Define two histograms with the lengths recorded and the correlation between length residual and uncertainty:
Hist_L30cm = TH1F("Hist_L30cm", "Lengths estimates by 30cm ruler", 1000, 0.0, 5.0);
Hist_L2m   = TH1F("Hist_L2m",   "Lengths estimates by 2m folding rule", 1000, 0.0, 5.0);

# Define lists to contain the measurements, such that you can easily loop over them later!
L30cm = []
eL30cm = []
L2m = []
eL2m = []

# Define sums, from which the mean and RMS can be calculated:
sum0 = 0.0
sum1 = 0.0
sum2 = 0.0
sum0_2m = 0.0
sum1_2m = 0.0
sum2_2m = 0.0

files = ["data_TableMeasurements2009.txt", "data_TableMeasurements2010.txt", "data_TableMeasurements2011.txt", "data_TableMeasurements2012.txt", "data_TableMeasurements2013.txt"]

# Loop over files and open them
for file in files :
    with open( file, 'r' ) as f : 

        # Read and ignore header lines:
        header1 = f.readline()
        header2 = f.readline()

        # Loop over lines and extract variables of interest
        for line in f:
            line = line.strip()
            columns = line.split()
            if (len(columns) == 4) :

                # Put the values read into a list:
                L30cm.append(float(columns[0]))
                eL30cm.append(float(columns[1]))
                L2m.append(float(columns[2]))
                eL2m.append(float(columns[3]))        
                print "  Measurement %3d:   L(30cmR): %6.3f +- %6.3f m      L(2mFR): %6.3f +- %6.3f m"%(int(sum0), L30cm[-1], eL30cm[-1], L2m[-1], eL2m[-1])

                # Histograms to show the distribution of ALL measurements (i.e. 0-5 meters):
                Hist_L30cm.Fill(L30cm[-1])
                Hist_L2m.Fill(L2m[-1])

                # Sums to get the "naive" means and RMSs:
                sum0 += 1
                sum1 += L30cm[-1]
                sum2 += L30cm[-1]*L30cm[-1]
                sum0_2m += 1
                sum1_2m += L2m[-1]
                sum2_2m += L2m[-1]*L2m[-1]

    f.close()


# ----------------------------------------------------------------------------------- #
# Calculate mean, RMS and from that error on the mean:

mean  = sum1/sum0                      # Calculate the mean
sigma = sqrt(sum2/sum0 - mean*mean)    # Calculate the RMS (remember: RMS = sqrt(<sum_x^2> - <sum_x>^2))
sigma_mean = sigma / sqrt(sum0)        # Uncertainty of the mean is RMS / sqrt(N)

mean2m  = sum1_2m/sum0_2m                          # Calculate the mean
sigma2m = sqrt(sum2_2m/sum0_2m - mean2m*mean2m)    # Calculate the RMS (remember: RMS = sqrt(<sum_x^2> - <sum_x>^2))
sigma_mean2m = sigma2m / sqrt(sum0_2m)             # Uncertainty of the mean is RMS / sqrt(N)

# This is the "naive" mean, in that it simply uses all measurements without any inspection.
print "  30cm:   Naive Mean = %6.4f +- %6.4f      RMS = %6.4f   (N = %2d)"%(mean, sigma_mean, sigma, int(sum0))
print "  2m:     Naive Mean = %6.4f +- %6.4f      RMS = %6.4f   (N = %2d)"%(mean2m, sigma_mean2m, sigma2m, int(sum0_2m))


# ----------------------------------------------------------------------------------- #
# Plot histograms on the screen in one canvas and fit with Gaussians:

canvas = TCanvas( "canvas", "canvas", 50, 50, 1200, 600 )
canvas.Divide(1,2)

# Set all the trimmings for the histogram plotting:
canvas.cd(1)
Hist_L30cm.GetXaxis().SetRangeUser(3.0, 4.0)       # From the histogram, we get the x-axis, and set the range.
Hist_L30cm.GetXaxis().SetTitle("Table length (m)") # We give the x-axis a label.
Hist_L30cm.GetYaxis().SetTitle("Frequency")        # We give the y-axis a label.
Hist_L30cm.SetLineColor(kBlue)                     # Set the histogram line color to blue.
Hist_L30cm.SetLineWidth(2)                         # Set the histogram line width to two.

# Define a resonable fitting function:
fit_L30cm = TF1("fit_L30cm", "gaus", 3.25, 3.45)   # Define a (predefined) Gaussian fitting function.
fit_L30cm.SetParameters(50.0, 3.36, 0.01)          # Set the starting parameters to reasonable values.
fit_L30cm.SetLineColor(kRed)                       # Set the line color to red.
fit_L30cm.SetLineWidth(2)                          # Set tne line width to 2.
Hist_L30cm.Fit("fit_L30cm", "R")                   # Note the two options: L for Likelihood and R for Range.
Hist_L30cm.Draw()                                  # Draw the histogram

canvas.cd(2)
Hist_L2m.GetXaxis().SetRangeUser(3.0, 4.0)       # From the histogram, we get the x-axis, and set the range.
Hist_L2m.GetXaxis().SetTitle("Table length (m)") # We give the x-axis a label.
Hist_L2m.GetYaxis().SetTitle("Frequency")        # We give the y-axis a label.
Hist_L2m.SetLineColor(kBlue)                     # Set the histogram line color to blue.
Hist_L2m.SetLineWidth(2)                         # Set the histogram line width to two.

# Define a resonable fitting function:
fit_L2m = TF1("fit_L2m", "gaus", 3.25, 3.45)     # Define a (predefined) Gaussian fitting function.
fit_L2m.SetParameters(50.0, 3.36, 0.01)          # Set the starting parameters to reasonable values.
fit_L2m.SetLineColor(kRed)                       # Set the line color to red.
fit_L2m.SetLineWidth(2)                          # Set tne line width to 2.
Hist_L2m.Fit("fit_L2m", "R")                     # Note the two options: L for Likelihood and R for Range.
Hist_L2m.Draw()

if (SavePlots):
    canvas.SaveAs("TableMeasurements.png")       # Save plot (format follow extension name)


# ----------------------------------------------------------------------------------- #
# Solution note:
# --------------
# These fits show, that the true value lies around the range 3.362-3.364, and that the
# uncertainty of "a typical but correct" measurement is 7mm for the 30cm ruler, and 3mm
# for the 2m folding rule.
# I decide to only consider measurements in the 3sigma range obtained from the fit.
# 
# In the 30cm case, I note that there are also a significant amount of measurements
# +-30cm from this range. This is clearly measurements, where the number of 30cm rulers
# has been off by one. As this is an understood problem, I decide to also include these
# measurements, provided that they after the 30cm correction is within the 3sigma range.
# ----------------------------------------------------------------------------------- #


# ----------------------------------------------------------------------------------- #
# ----------------------------------------------------------------------------------- #
# Detailed data analysis for unweighted result:
# ----------------------------------------------------------------------------------- #
# ----------------------------------------------------------------------------------- #

# Define sums for the unweighted results:
sum0_uw30 = 0.0
sum1_uw30 = 0.0
sum2_uw30 = 0.0
sum0_uw2m = 0.0
sum1_uw2m = 0.0
sum2_uw2m = 0.0

# Also define a new list with those results that are accepted to be within a reasonable range:
# This will save us asking the same requirement later, and also for the 30cm correction!
L30cmGood = []
eL30cmGood = []
L2mGood = []
eL2mGood = []

# Define the ranges within which measurements are accepted (+-3 sigma of the fitted peak):
min30cm = fit_L30cm.GetParameter(1) - 3.0*fit_L30cm.GetParameter(2)
max30cm = fit_L30cm.GetParameter(1) + 3.0*fit_L30cm.GetParameter(2)
print "  30cm:  Only accept measurements in the range: %5.3f - %5.3f"%(min30cm,max30cm)
min2m = fit_L2m.GetParameter(1) - 3.0*fit_L2m.GetParameter(2)
max2m = fit_L2m.GetParameter(1) + 3.0*fit_L2m.GetParameter(2)
print "  2m:    Only accept measurements in the range: %5.3f - %5.3f"%(min2m,max2m)

# Loop over the 30cm measurements, and include only those in an acceptable
# range (possibly corrected):
for i in range ( len(L30cm) ) :
    if (min30cm < L30cm[i] < max30cm) :
        sum0_uw30 += 1.0
        sum1_uw30 += L30cm[i]
        sum2_uw30 += L30cm[i]*L30cm[i]
        L30cmGood.append(L30cm[i])
        eL30cmGood.append(eL30cm[i])
    if (Include30cmOff and min30cm < L30cm[i]-0.30 < max30cm) :
        sum0_uw30 += 1.0
        sum1_uw30 += L30cm[i]-0.30
        sum2_uw30 += (L30cm[i]-0.30)*(L30cm[i]-0.30)
        L30cmGood.append(L30cm[i]-0.30)
        eL30cmGood.append(eL30cm[i])
    if (Include30cmOff and min30cm < L30cm[i]+0.30 < max30cm) :
        sum0_uw30 += 1.0
        sum1_uw30 += L30cm[i]+0.30
        sum2_uw30 += (L30cm[i]+0.30)*(L30cm[i]+0.30)
        L30cmGood.append(L30cm[i]+0.30)
        eL30cmGood.append(eL30cm[i])

mean_uw30  = sum1_uw30/sum0_uw30                                # Calculate the mean
sigma_uw30 = sqrt(sum2_uw30/sum0_uw30 - mean_uw30*mean_uw30)    # Calculate the RMS
emean_uw30 = sigma_uw30 / sqrt(sum0_uw30)                       # Uncertainty of the mean is RMS / sqrt(N)
print "  30cm:  Unweighted Mean = %7.5f +- %7.5f      RMS = %7.5f   (N = %2d)"%(mean_uw30, emean_uw30, sigma_uw30, int(sum0_uw30))


# Loop over 2m measurements, only accepting measurements in a narrow range:
for i in range ( len(L2m) ) :
    if (min2m < L2m[i] < max2m) :
        sum0_uw2m += 1.0
        sum1_uw2m += L2m[i]
        sum2_uw2m += L2m[i]*L2m[i]
        L2mGood.append(L2m[i])
        eL2mGood.append(eL2m[i])

mean_uw2m  = sum1_uw2m/sum0_uw2m                                # Calculate the mean
sigma_uw2m = sqrt(sum2_uw2m/sum0_uw2m - mean_uw2m*mean_uw2m)    # Calculate the RMS
emean_uw2m = sigma_uw2m / sqrt(sum0_uw2m)                       # Uncertainty of the mean is RMS / sqrt(N)
print "  2m:    Unweighted Mean = %7.5f +- %7.5f      RMS = %7.5f   (N = %2d)"%(mean_uw2m, emean_uw2m, sigma_uw2m, int(sum0_uw2m))


# ----------------------------------------------------------------------------------- #
# Solution note:
# --------------
# The reduction of the uncertainty from the "naive" 138mm to 0.7mm is an improvement by 
# a factor of about 190, which corresponds to 190*190 = 36100 as many measurements!!!
# So if the naive uncertainty was to be decreased to the more clever (unweighted) one,
# It would require the entire grown up Danish population to measure it!
# 
# Regarding the inclusion of measurements with +-30cm bias, I note that the uncertainty
# on the mean goes down from 0.75mm to 0.72mm, which means that they contribute. As the
# two means are very close, the additional measurements are probably OK. But it is also
# OK not to include them.
# ----------------------------------------------------------------------------------- #


# ----------------------------------------------------------------------------------- #
# ----------------------------------------------------------------------------------- #
# Detailed data analysis for weighted result - looking closer at the uncertainties:
# ----------------------------------------------------------------------------------- #
# ----------------------------------------------------------------------------------- #


# Investigate, if the errors are correlated with the residual |value - mean|:
# ---------------------------------------------------------------------------
# To measure the correlation, we need to define several sums:
sum0_30  = 0.0
sumx_30  = 0.0
sumx2_30 = 0.0
sumy_30  = 0.0
sumy2_30 = 0.0
sumxy_30 = 0.0

Hist_30cm_RvsE = TH2F("Hist_30cm_RvsE", "Length residual vs. uncertainty estimates", 100, 0.0, 0.02, 100, 0.0, 0.05);
for i in range ( len(L30cmGood) ) :
    residual = fabs(L30cmGood[i]-mean_uw30)
    # Check if measurement is in good range AND if the error and residual are also in a reasonable range:
    if (eL30cmGood[i] > 0.0 and eL30cmGood[i] < 0.05 and residual < 0.02) :
        sum0_30  += 1
        sumx_30  += residual
        sumx2_30 += residual*residual
        sumy_30  += eL30cmGood[i]
        sumy2_30 += eL30cmGood[i]*eL30cmGood[i]
        sumxy_30 += residual*eL30cmGood[i]
        Hist_30cm_RvsE.Fill(residual, eL30cmGood[i])        # 2D (scatter) plot!

# Calculate the linear correlation coefficient:
meanx_30 = sumx_30 / sum0_30
sigmax_30 = sqrt(sumx2_30/sum0_30 - meanx_30*meanx_30)
meany_30 = sumy_30 / sum0_30
sigmay_30 = sqrt(sumy2_30/sum0_30 - meany_30*meany_30)
rhoxy_30 = (sumxy_30/sum0_30 - meanx_30*meany_30) / (sigmax_30 * sigmay_30)
print "  30cm: Mean x: %6.4f   RMS x: %6.4f    Mean y: %6.4f   RMS y: %6.4f    rho(xy): %6.3f"%(meanx_30, sigmax_30, meany_30, sigmay_30, rhoxy_30)


# Repeat for 2m folding stick:
# ----------------------------
sum0_2m  = 0.0
sumx_2m  = 0.0
sumx2_2m = 0.0
sumy_2m  = 0.0
sumy2_2m = 0.0
sumxy_2m = 0.0

Hist_2m_RvsE = TH2F("Hist_2m_RvsE", "Length residual vs. uncertainty estimates", 100, 0.0, 0.02, 100, 0.0, 0.05);
for i in range ( len(L2mGood) ) :
    residual = fabs(L2mGood[i]-mean_uw2m)
    if (eL2mGood[i] > 0.0 and eL2mGood[i] < 0.05 and residual < 0.02) :
        sum0_2m  += 1
        sumx_2m  += residual
        sumx2_2m += residual*residual
        sumy_2m  += eL2mGood[i]
        sumy2_2m += eL2mGood[i]*eL2mGood[i]
        sumxy_2m += residual*eL2mGood[i]
        Hist_2m_RvsE.Fill(residual, eL2mGood[i])

# Calculate the linear correlation coefficient:
meanx_2m = sumx_2m / sum0_2m
sigmax_2m = sqrt(sumx2_2m/sum0_2m - meanx_2m*meanx_2m)
meany_2m = sumy_2m / sum0_2m
sigmay_2m = sqrt(sumy2_2m/sum0_2m - meany_2m*meany_2m)
rhoxy_2m = (sumxy_2m/sum0_2m - meanx_2m*meany_2m) / (sigmax_2m * sigmay_2m)
print "  2m:   Mean x: %6.4f   RMS x: %6.4f    Mean y: %6.4f   RMS y: %6.4f    rho(xy): %6.3f"%(meanx_2m, sigmax_2m, meany_2m, sigmay_2m, rhoxy_2m)


# Draw the uncertainties vs. the residuals:
canvas2 = TCanvas( "canvas2", "canvas2", 100, 100, 1200, 600 )
canvas2.Divide(2,1)

canvas2.cd(1)
Hist_30cm_RvsE.GetXaxis().SetRangeUser(0.0, 0.02)      # From the histogram, we get the x-axis, and set the range.
Hist_30cm_RvsE.GetXaxis().SetTitle("Residual (m)")     # We give the x-axis a label.
Hist_30cm_RvsE.GetYaxis().SetTitle("Uncertainty (m)")  # We give the y-axis a label.
Hist_30cm_RvsE.SetMarkerColor(kBlue)                   # Set the histogram line color to blue.
Hist_30cm_RvsE.SetMarkerStyle(20)                      # Set the histogram line width to two.
Hist_30cm_RvsE.Draw()                                  # Draw the histogram

canvas2.cd(2)
Hist_2m_RvsE.GetXaxis().SetRangeUser(0.0, 0.01)      # From the histogram, we get the x-axis, and set the range.
Hist_2m_RvsE.GetXaxis().SetTitle("Residual (m)")     # We give the x-axis a label.
Hist_2m_RvsE.GetYaxis().SetTitle("Uncertainty (m)")  # We give the y-axis a label.
Hist_2m_RvsE.SetMarkerColor(kBlue)                   # Set the histogram line color to blue.
Hist_2m_RvsE.SetMarkerStyle(20)                      # Set the histogram line width to two.
Hist_2m_RvsE.Draw()                                  # Draw the histogram

if (SavePlots) :
    canvas2.SaveAs("Correlations_RvsE.png")      # Save plot (format follow extension name)


# ----------------------------------------------------------------------------------- #
# Solution note:
# --------------
# The purpose of the question of correlation was mainly to ensure, that you tried to
# calculate this yourself! However note that even perfect errors would not have a
# correlation coefficient very high! But it does give some hint of whether the
# uncertainties given are worth anything or not. And it is always VERY healthy to
# see a plot of the distributions, to check for something strange.
# 
# The real test of whether the uncertainties are reliable, is obtained by making a
# so-called pull plot, which means plotting the pull (pull = (x_i - mean) / sigma_i)
# distribution.
# If the errors are credible, it should be a unit Gaussian (think about why!).
# ----------------------------------------------------------------------------------- #

# Pull distributions:
Hist_30cm_Pull = TH1F("Hist_30cm_Pull", "Pull distribution - 30cm", 100, -10.0, 10.0);
for i in range ( len(L30cmGood) ) :
    if (eL30cmGood[i] > 0.0) :
        pull = (L30cmGood[i]-mean_uw30) / eL30cmGood[i]
        if (fabs(pull) > 5.0) :         # Give warning, if any points are more than 5 sigma away!
            print "  30cm:  Warning! Large pull:  measurement = %6.4f +- %6.4f    pull = %6.4f"%(L30cmGood[i], eL30cmGood[i], pull)
        Hist_30cm_Pull.Fill(pull)

Hist_2m_Pull = TH1F("Hist_2m_Pull", "Pull distribution - 2m", 100, -10.0, 10.0);
for i in range ( len(L2mGood) ) :
    if (eL2mGood[i] > 0.0) :
        pull = (L2mGood[i]-mean_uw30) / eL2mGood[i]
        if (fabs(pull) > 5.0) :         # Give warning, if any points are more than 5 sigma away!
            print "  2m:    Warning! Large pull:  measurement = %6.4f +- %6.4f    pull = %6.4f"%(L2mGood[i], eL2mGood[i], pull)
        Hist_2m_Pull.Fill(pull)


canvas3 = TCanvas( "canvas3", "canvas3", 150, 150, 1200, 600 )
canvas3.Divide(2,1)

canvas3.cd(1)
Hist_30cm_Pull.GetXaxis().SetRangeUser(-10.0, 10.0)    # From the histogram, we get the x-axis, and set the range.
Hist_30cm_Pull.GetXaxis().SetTitle("Pull")             # We give the x-axis a label.
Hist_30cm_Pull.GetYaxis().SetTitle("Frequency")        # We give the y-axis a label.
Hist_30cm_Pull.SetMarkerColor(kBlue)                   # Set the histogram line color to blue.
Hist_30cm_Pull.SetMarkerStyle(20)                      # Set the histogram line width to two.

fit_pull30 = TF1("fit_pull30", "gaus", -4.0, 4.0)   # Define a (predefined) Gaussian fitting function.
fit_pull30.SetLineColor(kRed)                       # Set the line color to red.
fit_pull30.SetLineWidth(2)                          # Set tne line width to 2.
Hist_30cm_Pull.Fit("fit_pull30", "RL")              # Note the two options: L for Likelihood and R for Range.
Hist_30cm_Pull.Draw()                               # Draw the histogram

canvas3.cd(2)
Hist_2m_Pull.GetXaxis().SetRangeUser(10.0, 10.0)     # From the histogram, we get the x-axis, and set the range.
Hist_2m_Pull.GetXaxis().SetTitle("Pull")             # We give the x-axis a label.
Hist_2m_Pull.GetYaxis().SetTitle("Frequency")        # We give the y-axis a label.
Hist_2m_Pull.SetMarkerColor(kBlue)                   # Set the histogram line color to blue.
Hist_2m_Pull.SetMarkerStyle(20)                      # Set the histogram line width to two.

fit_pull2m = TF1("fit_pull2m", "gaus", -4.0, 4.0)   # Define a (predefined) Gaussian fitting function.
fit_pull2m.SetLineColor(kRed)                       # Set the line color to red.
fit_pull2m.SetLineWidth(2)                          # Set tne line width to 2.
Hist_2m_Pull.Fit("fit_pull2m", "RL")                # Note the two options: L for Likelihood and R for Range.
Hist_2m_Pull.Draw()                                 # Draw the histogram

if (SavePlots) :
    canvas3.SaveAs("Distribution_Pull.png")      # Save plot (format follow extension name)


# ----------------------------------------------------------------------------------- #
# Solution note:
# --------------
# The pull distributions show, that the uncertainties given are too large (sigma in
# the Gaussian fit is less than 1.0, so one is dividing by too large errors!), but
# not by very much. So the errors given by those who did a good measurement
# actually look reasonably trustworthy, except that there are large peaks around
# zero, which are from serious overestimates of the uncertainty.
# 
# Also, the pull distribution shows that one measurement of those of 30cm is more
# than 8sigma away from the mean. This measurement I will discard, as either the
# measurement has an error, or the uncertainty is much underestimated.
# ----------------------------------------------------------------------------------- #


# Calculate the weighted means:
# ---------------------------------------------------------------------------

# Define sums for the weighted results (note that "Nw30" is only for counting
# number of measurements used, and not used in the calculations):
Nw30 = 0
sum0_w30 = 0.0
sum1_w30 = 0.0

# Also check what the minimum and maximum uncertainties are (and possibly enlarge, if not trustworthy!):
emin30 = 999.9
emax30 = 0.0

# Define a new function to find maximum of two numbers:
def max (a,b) :
    if (a>b) : return a
    else :     return b

for i in range ( len(L30cmGood) ) :
    if (eL30cmGood[i] > 0.0) :
        pull = (L30cmGood[i]-mean_uw30) / eL30cmGood[i]
        if (fabs(pull) < 5.0) :
            Nw30 += 1
            elowerlimit = max(0.002, eL30cmGood[i])           # Set the minimum possible error to 2mm!!!
            sum0_w30 += 1.0          / (elowerlimit*elowerlimit)
            sum1_w30 += L30cmGood[i] / (elowerlimit*elowerlimit)
            if (eL30cmGood[i] < emin30) : emin30 = eL30cmGood[i]
            if (eL30cmGood[i] > emax30) : emax30 = eL30cmGood[i]

# Calculate the weighted mean (and check error range):
mean_w30  = sum1_w30 / sum0_w30
emean_w30 = sqrt(1.0 / sum0_w30) * fit_pull30.GetParameter(2)    # I multiply with the pull width to make up for
                                                                 # general overestimates of the uncertainties.
print "  30cm:     Weighted mean = %7.5f +- %7.5f    (N=%3d)"%(mean_w30, emean_w30, Nw30)
print "  30cm:     Smallest error is: %7.5f   Is that realistic?"%emin30
print "  30cm:     Largest error is:  %7.5f   Is that realistic?"%emax30


# Repeat for 2m results:
# ----------------------
Nw2m = 0
sum0_w2m = 0.0
sum1_w2m = 0.0

emin2m = 999.9
emax2m = 0.0

for i in range ( len(L2mGood) ) :
    if (eL2mGood[i] > 0.0) :
        pull = (L2mGood[i]-mean_uw30) / eL2mGood[i]
        if (fabs(pull) < 5.0) :
            Nw2m += 1
            elowerlimit = max(0.001, eL30cmGood[i])           # Set the minimum possible error to 1mm!!!
            sum0_w2m += 1.0        / (elowerlimit*elowerlimit)
            sum1_w2m += L2mGood[i] / (elowerlimit*elowerlimit)
            if (eL2mGood[i] < emin2m) : emin2m = eL2mGood[i]
            if (eL2mGood[i] > emax2m) : emax2m = eL2mGood[i]

# Calculate the weighted mean (and check error range):
mean_w2m  = sum1_w2m / sum0_w2m
emean_w2m = sqrt(1.0 / sum0_w2m) * fit_pull2m.GetParameter(2)    # I multiply with the pull width to make up for
                                                                 # general overestimates of the uncertainties.
print "  2m:       Weighted mean = %7.5f +- %7.5f    (N=%3d)"%(mean_w2m, emean_w2m, Nw2m)
print "  2m:       Smallest error is: %7.5f   Is that realistic?"%emin2m
print "  2m:       Largest error is:  %7.5f   Is that realistic?"%emax2m


# ----------------------------------------------------------------------------------- #
# Solution note:
# --------------
# The weighted means make use of the uncertainties, and further decrease the error
# on the final mean. In order for the uncertainties used to be realistic, I scale
# them by the width of the pull distribution (think about why!).
# 
# The final result is indeed very accurate. In order to check, if they are consistent
# we consider the difference and its uncertainty, both in the unweighted and weighted
# case.
# ----------------------------------------------------------------------------------- #

def sqr (a) : return a*a

diff_uw = mean_uw30 - mean_uw2m
ediff_uw = sqrt(sqr(emean_uw30) + sqr(emean_uw2m))
print "  The difference between the unweighted results is:   diff = %7.5f +- %7.5f   (%3.1f sigma)"%(diff_uw, ediff_uw, fabs(diff_uw/ediff_uw))

diff_w = mean_w30 - mean_w2m
ediff_w = sqrt(sqr(emean_w30) + sqr(emean_w2m))
print "  The difference between the unweighted results is:   diff = %7.5f +- %7.5f   (%3.1f sigma)"%(diff_w, ediff_w, fabs(diff_w/ediff_w))


# ----------------------------------------------------------------------------------- #
# Solution note:
# --------------
# The results agree remarkably well (I suspected it would be worse!), and thus we can
# actually have some confidence in the very small uncertainties.
#
# The completely final result is of course a weighted average of the two!
# ----------------------------------------------------------------------------------- #


raw_input('Press Enter to exit')


# ----------------------------------------------------------------------------------- #
#
# Start by taking a close look at the data, first by inspecting the numbers in the data
# file, and then by considering the histograms produced by running the macro.
# 
# To begin with, only consider the 30cm ruler measurements from 2013, and disregard the
# estimated/guessed uncertainties. You can then expand from there, as guided below with
# questions.
# 
# Questions:
# ----------
#  1) Consider the calculated mean and width, and make sure that you understand how
#     they were performed. Is the result as you would expect it? And do you think that
#     it is close to the best possible (i.e. most precise) result?
# 
#  2) Do any of the measurements looks wrong/bad/suspecious? Do you see any repeated
#     mistakes done? Would you correct or exclude any of the measurements and how would
#     you justify this?
#     If so, make sure that your program does this, and recalculate the result (with sums).
#     Also, fit the length measurements with a Gaussian distribution. Should the binning
#     be changed for these fits? And is the Gaussian distribution justified?
#
#  3) Write the number of measurements you accepted for the length determination along
#     with your result (mean and error on mean) into the questionaire.
#
# You should at least reach this point in the exercise! And preferably much further!!!
# Of the questions below, you should perhaps focus mostly on 5, 6 and 7.
#
#  4) (Slightly difficult question)
#     Is there any correlation between the errors on the measurements and the actual
#     distance to the mean (this is called the "residual")? That is, do the errors
#     contain useful information, that should be used in the calculation of the most
#     accurate estimate of the mean? 
#     In all simplicity - did those who had a measurement close to the mean also put down a
#     small uncertainty? What is the correlation between residuals and the uncertainties?
#     Yes, define new sums as discussed in the lectures, and calculate the correlation!
#
#  5) Compare the size of the residuals with the size of the uncertainties. Were people too
#     optimistic or too pesimistic (or were the uncertainty estimates/guesses good)?
#
#  6) Whether or not you found a large enough correlation above, try to calculate the
#     weighted mean. Do you need to discard more dubious measurements? Did the result
#     improve? Do you trust this result more?
# 
#  7) Repeat the above questions for the 2m folding rule.
# 
#  8) How much did the single measurement uncertainty go down from the 30cm ruler case
#     to the 2m folding rule? Can you quantify how much better the 2m folding rule is?
# 
# 
# Advanced questions:
# -------------------
#  1) Does the length of the table seems to be different when measured with a 30cm and a 2m
#     ruler? Quantify this statement! I.e. what is the difference, and what is the
#     uncertainty on that difference? Is this significantly away from 0? How big an
#     effect on the 30cm ruler would this correspond to?
# 
#  2) If you were asked for the best estimate of the length of the table, what would
#     you do? (If posssible, read Bevington page 58 bottom!)
# 
# 
#
#----------------------------------------------------------------------------------