#!/usr/bin/env python

# ----------------------------------------------------------------------------------- #
#
#   Python program for analysing measurements of the length of the lecture table in
#   Auditorium A at NBI.
# 
#   There are two measurements each with estimated error of the table length:
#     1: Measurement with a 30cm ruler.
#     2: Measurement with a 2m folding ruler.
# 
#   Each person was asked not only to state the measurement, but also their estimated
#   uncertainty. None of the persons could see others measurements in order to get the
#   largest degree of independence. Also, the 30cm ruler measurement was asked to be
#   done first.
# 
#   Author: Troels C. Petersen (NBI)
#   Email:  petersen@nbi.dk
#   Date:   10th of December 2018
#
# ----------------------------------------------------------------------------------- #
#                       EXAMPLE of analysis to yield SOLUTION!!!
# ----------------------------------------------------------------------------------- #

from ROOT import *
import os, math

# Set the showing of statistics and fitting results (0 means off, 1 means on):
gStyle.SetOptStat("emr");   # See http://root.cern.ch/root/html/TStyle.html#TStyle:SetOptStat
gStyle.SetOptFit(1111);     # See http://root.cern.ch/root/html/TStyle.html#TStyle:SetOptFit

# Options of program:
Verbose = True
Blinded = False
Correct30cmOff = True
Correct2mOff   = True
ApplyChauvenetsCriterion = True
SavePlots = False


# Naive results:
#  30cm: Naive Mean = 3.3290 +- 0.0098    RMS = 0.2172  (N = 495)
#  2m:   Naive Mean = 3.3320 +- 0.0112    RMS = 0.2480  (N = 493)
#  Correspondence between raw 30cm and 2m measurement:  Diff = -0.00297 +- 0.01484   (-0.20 sigma)

# FINAL: With correction (p_cut = 0.05 in Chauvenet's Criterion):
#  30cm:  Unweighted Mean = 3.36120 +- 0.00092      RMS = 0.01960   (N = 454)
#  2m:    Unweighted Mean = 3.36295 +- 0.00026      RMS = 0.00542   (N = 430)
#  Improvement in error from naive to selective, unweighted - 30cm: 10.6
#  Improvement in error from naive to selective, unweighted - 2m:   42.7
#  Correspondence between unweighted 30cm and 2m measurement:  Diff = -0.00175 +- 0.00130   (-1.35 sigma)

#  30cm:  Weighted Mean = 3.36376 +- 0.00030      p(Chi2=460.5, Ndof=442) = 0.2622   (N = 443)
#  2m:    Weighted Mean = 3.36385 +- 0.00014      p(Chi2=451.1, Ndof=431) = 0.2426   (N = 432)
#  Improvement in error from selected unweighted to weighted - 30cm:  3.1
#  Improvement in error from selected unweighted to weighted - 2m:    1.8
#  Correspondence between weighted 30cm and 2m measurement:  Diff = -0.00009 +- 0.00033   (-0.27 sigma)
#
#  Correspondence between unweighted and weighted 30cm measurement:  Diff = 0.00257 +- 0.00097   (2.65 sigma)
#  Correspondence between unweighted and weighted 2m measurement:    Diff = 0.00090 +- 0.00030   (3.04 sigma)

# Fitting results:
#  Single Gaussian fit:                 L = 3.36332 +- 0.00058 m
#  Double Gaussian fit:                 L = 3.36418 +- 0.00041 m
#  3 x Double Gaussian:                 L = 3.36337 +- 0.00053 m
#  3 x Triple Gaussian (outlier) fit:   L = 3.36428 +- 0.00042 m


# OTHER settings:
# With correction (p_cut = 0.01 in Chauvenet's Criterion):
#   30cm:  Unweighted Mean = 3.36320 +- 0.00128      RMS = 0.02791   (N = 476)
#   2m:    Unweighted Mean = 3.36279 +- 0.00028      RMS = 0.00590   (N = 435)
#   The number of rejected 30cm measurements was:  19
#   The number of rejected  2m  measurements was:  58
#   30cm:  Weighted Mean = 3.36444 +- 0.00025      p(Chi2=446.1, Ndof=443) = 0.4497   (N = 444)
#   2m:    Weighted Mean = 3.36385 +- 0.00014      p(Chi2=451.1, Ndof=431) = 0.2426   (N = 432)

# With correction (p_cut = 0.30 in Chauvenet's Criterion):
#   30cm:  Unweighted Mean = 3.36319 +- 0.00060      RMS = 0.01225   (N = 421)
#   2m:    Unweighted Mean = 3.36279 +- 0.00028      RMS = 0.00590   (N = 435)
#   The number of rejected 30cm measurements was:  74
#   The number of rejected  2m  measurements was:  58
#   30cm:  Weighted Mean = 3.36444 +- 0.00025      p(Chi2=446.1, Ndof=443) = 0.4497   (N = 444)
#   2m:    Weighted Mean = 3.36385 +- 0.00014      p(Chi2=451.1, Ndof=431) = 0.2426   (N = 432)

# No correction (p_cut = 0.30 in Chauvenet's Criterion):
#   30cm:  Unweighted Mean = 3.36257 +- 0.00057      RMS = 0.01089   (N = 368)
#   2m:    Unweighted Mean = 3.36272 +- 0.00028      RMS = 0.00588   (N = 430)
#   The number of rejected 30cm measurements was: 127
#   The number of rejected  2m  measurements was:  63
#   30cm:  Weighted Mean = 3.36388 +- 0.00031      p(Chi2=516.2, Ndof=403) = 0.0001   (N = 404)
#   2m:    Weighted Mean = 3.36371 +- 0.00015      p(Chi2=431.9, Ndof=427) = 0.4251   (N = 428)


# ----------------------------------------------------------------------------------- #
def sqr (a) : return a*a


# ----------------------------------------------------------------------------------- #
# Calculate mean and RMS from a list:
# ----------------------------------------------------------------------------------- #
def MeanRMS( inlist ) :

    if (type(inlist) is not list) :
        print "Error: Input to MeanRMS is NOT a list!"
        return [-9999999999.9, -99999999999.9]

    if len( inlist ) == 0  :
        print "Error: Input list is empty! No calculation of mean and RMS possible."
        return [-9999999999.9, -99999999999.9]
    elif len( inlist ) == 1  :
        print "Error: Input list has length one! No calculation of RMS possible."
        return [inlist[0], -99999999999.9]

    # Find the mean:
    mu = 0.0
    for entry in inlist : mu += entry / float(len(inlist))

    # Find the standard deviation (RMS):
    rms = 0.0
    for entry in inlist : rms += (entry-mu)*(entry-mu)
    rms = math.sqrt( rms / float(len(inlist) - 1) )

    return [mu, rms]        # Return a list with two values: [mean, rms]


# ----------------------------------------------------------------------------------- #
# Apply Chauvenet's Criterion to a list and return (possibly) reduced list:
# ----------------------------------------------------------------------------------- #

# Note: Peirce's criterion (though not documented there) has a Python implementation at:
#       https://en.wikipedia.org/wiki/Peirce%27s_criterion

def ChauvenetsCriterion( inlist, pmin = 0.1) :

    if (type(inlist) is not list) :
        print "Error: Input to ChauvenentsCriterion is NOT a list!"
        return [-9999999999.9, -99999999999.9]

    outlist = [entry for entry in inlist]
    meanrms = MeanRMS(inlist)   # Calculation of initial mean and rms.

    # Loop over the following iterations, until the furthest outlier is probably enough (p_any_outliers > pmin):
    while True :
        # Find the furthers outlier, i.e. most distant measurement from mean (least probable) and its index:
        ifurthest  = 0
        dLfurthest = 0.0
        for number, entry in enumerate( outlist ) :
            if (abs(entry - meanrms[0]) > dLfurthest) :
                ifurthest = number                       # Note the index, so that this entry can later be removed!
                dLfurthest = abs(entry - meanrms[0])

        # Calculate the probability of any such outliers (taking into account that there are many measurements!):
        Nsigma_outlier = dLfurthest / meanrms[1]
        p_this_outlier = TMath.Erfc(Nsigma_outlier / sqrt(2.0)) / 2.0
        p_any_outliers = 1.0 - (1.0 - p_this_outlier)**int(len(outlist))

        if (Verbose) :
            print " %3d: L=%5.3f  dL=%5.3f  Nsig=%5.2f   p1=%10.8f, p_all=%10.8f >? pmin=%5.3f   N=%3d  mean=%6.4f  rms=%6.4f"%(ifurthest, outlist[ifurthest], dLfurthest, Nsigma_outlier, p_this_outlier, p_any_outliers, pmin, len(outlist), meanrms[0], meanrms[1]),

        # Key line: If the furthest outlier is probably enough, then stop rejecting points:
        if (p_any_outliers > pmin) :
            if (Verbose) : print "  -> Accepted"
            break

        # Remove the furthest point from the list of accepted measurements (if any are left!),
        # Recalculate mean and RMS, and finally reiterate:
        if (len(outlist) > 1) :
            if (Verbose) : print "  -> Rejected"
            outlist.pop(ifurthest)
            meanrms = MeanRMS(outlist)
        else :
            print "\n  ERROR: All measurements have been rejected!"
            break

    return outlist


# ----------------------------------------------------------------------------------- #
# Initial data analysis - look at the data and get "naive" values:
# ----------------------------------------------------------------------------------- #

# Define two histograms with all the lengths recorded:
Hist_L30cm = TH1F("Hist_L30cm", "Lengths estimates by 30cm ruler",      1000, 0.0, 5.0);
Hist_L2m   = TH1F("Hist_L2m",   "Lengths estimates by 2m folding rule", 5000, 0.0, 5.0);

Lblind = 0.0
r = TRandom3()
r.SetSeed(1)
if (Blinded) : Lblind = r.Gaus(0.0, 0.1)      # I add a constant (Gaussian with +-10cm) to remain "blind"


# Define lists to contain the measurements, such that you can easily loop over them later!
L30cm = []
eL30cm = []
L2m = []
eL2m = []
Nread  = 0       # Number of measurements read in total

infiles = ["data_TableMeasurements2009.txt", "data_TableMeasurements2010.txt", "data_TableMeasurements2011.txt", "data_TableMeasurements2012.txt", "data_TableMeasurements2013.txt", "data_TableMeasurements2014.txt", "data_TableMeasurements2015.txt", "data_TableMeasurements2016.txt", "data_TableMeasurements2017.txt", "data_TableMeasurements2018.txt"]

# Loop over files and open them
for infile in infiles :
    with open( infile, 'r' ) as f : 

        # Read and ignore header lines:
        header1 = f.readline()
        header2 = f.readline()

        # Loop over lines and extract variables of interest
        for line in f:
            line = line.strip()
            columns = line.split()
            if (len(columns) == 4) :

                # Put the values read into a list:
                L30cm.append(float(columns[0]))
                eL30cm.append(float(columns[1]))
                L2m.append(float(columns[2]))
                eL2m.append(float(columns[3]))        

                # Histograms to show the distribution of ALL measurements (i.e. 0-5 meters):
                Hist_L30cm.Fill(L30cm[-1])
                Hist_L2m.Fill(L2m[-1])

                # Count number of lines read, and print result for visual inspection:
                Nread += 1
                if (Verbose) :
                    print "  Measurement %3d:   L(30cmR): %6.3f +- %6.3f m      L(2mFR): %6.3f +- %6.3f m"%(Nread, L30cm[-1], eL30cm[-1], L2m[-1], eL2m[-1])

    f.close()


# ----------------------------------------------------------------------------------- #
# Plot histograms on the screen in one canvas and fit with Gaussians:
# ----------------------------------------------------------------------------------- #

canvas = TCanvas( "canvas", "canvas", 50, 50, 1200, 600 )
canvas.Divide(1,2)

minL = 0.0
maxL = 5.0

# Set all the trimmings for the histogram plotting:
canvas.cd(1)
Hist_L30cm.GetXaxis().SetRangeUser(minL, maxL)     # From the histogram, we get the x-axis, and set the range.
Hist_L30cm.GetXaxis().SetTitle("Table length (m)") # We give the x-axis a label.
Hist_L30cm.GetYaxis().SetTitle("Frequency")        # We give the y-axis a label.
Hist_L30cm.SetLineColor(kBlue)                     # Set the histogram line color to blue.
Hist_L30cm.SetLineWidth(2)                         # Set the histogram line width to two.

# Define a resonable fitting function:
fit_L30cm = TF1("fit_L30cm", "gaus", 3.32, 3.40)   # Define a (predefined) Gaussian fitting function.
fit_L30cm.SetParameters(50.0, 3.36, 0.008)         # Set the starting parameters to reasonable values.
fit_L30cm.SetLineColor(kRed)                       # Set the line color to red.
fit_L30cm.SetLineWidth(2)                          # Set tne line width to 2.
Hist_L30cm.Fit("fit_L30cm", "QR")                  # Note the two options: Q for Quiet and R for Range.
Hist_L30cm.Draw()                                  # Draw the histogram


canvas.cd(2)
Hist_L2m.GetXaxis().SetRangeUser(minL, maxL)     # From the histogram, we get the x-axis, and set the range.
Hist_L2m.GetXaxis().SetTitle("Table length (m)") # We give the x-axis a label.
Hist_L2m.GetYaxis().SetTitle("Frequency")        # We give the y-axis a label.
Hist_L2m.SetLineColor(kBlue)                     # Set the histogram line color to blue.
Hist_L2m.SetLineWidth(2)                         # Set the histogram line width to two.

# Define a resonable fitting function:
fit_L2m = TF1("fit_L2m", "gaus", 3.34, 3.38)     # Define a (predefined) Gaussian fitting function.
fit_L2m.SetParameters(50.0, 3.36, 0.0025)        # Set the starting parameters to reasonable values.
fit_L2m.SetLineColor(kRed)                       # Set the line color to red.
fit_L2m.SetLineWidth(2)                          # Set tne line width to 2.
Hist_L2m.Fit("fit_L2m", "QR")                    # Note the two options: Q for Quiet and R for Range.
Hist_L2m.Draw()

canvas.Update()
if (SavePlots) : canvas.SaveAs("TableMeasurements_RawFitted.pdf")   # Save plot (format follow extension name)


# Compute approximate width of central peak, to get an estimate of the uncertainty on (good) measurements:
mean30_peakfit = fit_L30cm.GetParameter(1)
rms30_peakfit = fit_L30cm.GetParameter(2)
mean2m_peakfit = fit_L2m.GetParameter(1)
rms2m_peakfit = fit_L2m.GetParameter(2)


# ----------------------------------------------------------------------------------- #
# Solution note:
# --------------
# These fits show, that the true value lies around the range 3.362-3.364, and that the
# uncertainty of "a typical but correct" measurement is about 8mm for the 30cm ruler,
# and 3.0mm for the 2m folding rule.
# 
# In the 30cm case, I note that there are also a significant amount of measurements
# +-30cm from this range. This is clearly measurements, where the number of 30cm rulers
# has been off by one. As this is an understood problem, I may therefore decide to also
# include these measurements, but this depends on these contributing to the overall
# precision.
#
# In the 2m case, there are three measurements which are 2m short, and these could of
# course also be included.
# ----------------------------------------------------------------------------------- #


# -------------------------------------------------------------------------
# Make a list with all VALID measurements and compute "naive" mean and RMS:
# -------------------------------------------------------------------------

# I define "valid" as in the range 0.0m < measurement < 5.0m:
L30cm_valid = []
for entry in L30cm :
    if (0.0 < entry < 5.0) : L30cm_valid.append(entry)
    else : print "  The 30cm entry L = %6.3f was not considered valid!"%(entry)
meanrms30 = MeanRMS(L30cm_valid)
mean30  = meanrms30[0]
rms30   = meanrms30[1]
emean30 = meanrms30[1] / sqrt(len(L30cm_valid))

L2m_valid = []
for entry in L2m :
    if (0.0 < entry < 5.0) : L2m_valid.append(entry)
    else : print "  The 2m entry L = %6.3f was not considered valid!"%(entry)
meanrms2m = MeanRMS(L2m_valid)
mean2m  = meanrms2m[0]
rms2m   = meanrms2m[1]
emean2m = meanrms2m[1] / sqrt(len(L2m_valid))

# This is the "naive" mean, in that it simply uses all measurements without any inspection.
print "\n  ---------------------------   NAIVE/RAW results  ---------------------------------"
print "  30cm: Naive Mean = %6.4f +- %6.4f    RMS = %6.4f  (N = %2d)"%(mean30+Lblind, emean30, rms30, int(len(L30cm_valid)))
print "  2m:   Naive Mean = %6.4f +- %6.4f    RMS = %6.4f  (N = %2d)"%(mean2m+Lblind, emean2m, rms2m, int(len(L2m_valid)))

# Also compute the difference between the 30cm and 2m separately combined measurements:
diff_raw  = mean30 - mean2m
ediff_raw = sqrt(sqr(emean30) + sqr(emean2m))
nsig_raw  = diff_raw / ediff_raw
print "  Correspondence between raw 30cm and 2m measurement:  Diff = %7.5f +- %7.5f   (%4.2f sigma)"%(diff_raw, ediff_raw, nsig_raw)


# ----------------------------------------------------------------------------------- #
# Apply correction to measurements (by +-30cm and/or +-2m), if I choose to do so.
#   For correction, use means found above, and simply shift everything 15-45cm away
#   by 30cm, leaving it to subsequent analysis, if point is to be included or not.

Ncorr30cm = 0
Ncorr2m   = 0

L30cm_corrected = []
for entry in L30cm_valid :
    if   (Correct30cmOff and mean30-0.45 < entry < mean30-0.15) :
        L30cm_corrected.append(entry+0.30)
        Ncorr30cm += 1
    elif (Correct30cmOff and mean30+0.15 < entry < mean30+0.45) :
        L30cm_corrected.append(entry-0.30)
        Ncorr30cm += 1
    else : 
        L30cm_corrected.append(entry)
        
L2m_corrected = []
for entry in L2m_valid :
    if   (Correct2mOff and mean2m-3.0 < entry < mean2m-1.0) :
        L2m_corrected.append(entry+2.0)
        Ncorr2m += 1
    elif (Correct2mOff and mean2m+3.0 < entry < mean2m+1.0) :
        L2m_corrected.append(entry-2.0)
        Ncorr2m += 1
    else : 
        L2m_corrected.append(entry)

# Print the number of corrected measurements:
print "\n  Number of CORRECTED measurements:"
print "  30cm: Ncorr = %3d"%(Ncorr30cm)
print "  3m:   Ncorr = %3d"%(Ncorr2m)


# ----------------------------------------------------------------------------------- #
# Apply Chauvenet's Criterion or simply only accept measurements around some interval:
# ----------------------------------------------------------------------------------- #

# Define a list of accepted measurements for unweighted mean, following either Chauvenet's Criterion
# or a simpler selection (IMPORTANT NOTE: ALWAYS PRINT REJECTED VALUES TO KEEP CONTROL!):

# First define lists (empty for now) of accepted measurements for the unweighted mean:
L30cm_accepted_uw = []
L2m_accepted_uw = []


if (Verbose) : print "\n  The following measurements are deemed poor and removed by..."

if (ApplyChauvenetsCriterion) :
    print "\n  ---------------------------   Chauvenet's Criterion  ---------------------------------"
    # NOTE: The last number is the cut on p-value at which we accept the outermost point. 
    if (Verbose) : print "Rejected 30cm measurements:"
    L30cm_accepted_uw = ChauvenetsCriterion( L30cm_corrected, 0.05)
    # L30cm_accepted_uw = ChauvenetsCriterion( L30cm_corrected, 0.3)   # To be used if no corrections are done!
    if (Verbose) : print "Rejected 2m measurements:"
    L2m_accepted_uw   = ChauvenetsCriterion( L2m_corrected, 0.05)

# ----------------------------------------------------------------------------------- #
# Solution note:
# --------------
# Applying Chauvenet's Criterion for the 30cm ruler measurements, has the "problem", that
# it is not very good at dealing with the two peaks at +-30cm! If these are to be removed,
# then a lot of reasonably good measurements will also be removed.
# The solution is to apply the correction to data, i.e. deem all measurements around +-30cm
# away from the peak as clearly a result of miscounting, and adjust them by 30cm. This is
# done in the interval (15cm-45cm), which may seem excessive, but if a measurement is 20cm
# away, it will in the corrected version still be 10cm away... thus still a poor measurement!
#
# However, the choice of correcting or not, and what value of the probability to choose for
# discarding measurements is by no means given, and the result of trying various approaches,
# which is why one should certainly MAKE SURE that one investigates how much the final
# result depends on these choices (which are then a systematic uncertainty!).
# ----------------------------------------------------------------------------------- #


else :
    # Cut at a "reasonable" value, i.e. one that would not discart many/any points, if the
    # distribution was nicely Gaussian. Here, I take that to be 4 sigma, which has a probability
    # of p = 0.000063 (two-sided), thus with 286 numbers, the chance of one being outside is
    # roughly p*N = 0.018, or about 2%. So I would rarely reject a good measurement. I obtain the
    # width (i.e. estimated uncertainty on the individual measurements) by fitting the central peak
    # by a Gaussian.
    Nsigma_cut = 4.0
    print "  ---------------------------   Nsigma=%4.2f cut away from core peak  ---------------------------"%(Nsigma_cut)

    # 30cm case:
    L30cm_accepted_uw = []
    Lmin = mean30_peakfit - Nsigma_cut*rms30_peakfit
    Lmax = mean30_peakfit + Nsigma_cut*rms30_peakfit
    print "  30cm: Given:  mean = %6.4f, rms = %6.4f, the allowed range is:   %6.4f - %6.4f"%(mean30_peakfit, rms30_peakfit, Lmin, Lmax)
    for number, entry in enumerate( L30cm_corrected ) :
        Nsigma = (entry - mean30_peakfit) / rms30_peakfit
        if (abs(Nsigma) < Nsigma_cut) : L30cm_accepted_uw.append(entry)
        else :
            if (Verbose) : print " %3d: %5.3f  Nsig=%6.2f   N=%3d  -> Rejected"%(number, abs(entry - mean30_peakfit), Nsigma, len(L30cm_accepted_uw))

    # 2m case:
    L2m_accepted_uw = []
    Lmin = mean2m_peakfit - Nsigma_cut*rms2m_peakfit
    Lmax = mean2m_peakfit + Nsigma_cut*rms2m_peakfit
    print "  2m:   Given:  mean = %6.4f, rms = %6.4f, the allowed range is:   %6.4f - %6.4f"%(mean2m_peakfit, rms2m_peakfit, Lmin, Lmax)
    for number, entry in enumerate( L2m_corrected ) :
        Nsigma = (entry - mean2m_peakfit) / rms2m_peakfit
        if (abs(Nsigma) < Nsigma_cut) : L2m_accepted_uw.append(entry)
        else :
            if (Verbose) : print " %3d: %5.3f  Nsig=%6.2f   N=%3d  -> Rejected"%(number, abs(entry - mean2m_peakfit), Nsigma, len(L2m_accepted_uw))


# At the very least note, how many points were rejected:
print "The number of rejected 30cm measurements was: %3d"%(len(L30cm_corrected) - len(L30cm_accepted_uw))
print "The number of rejected  2m  measurements was: %3d"%(len(L2m_corrected)   - len(L2m_accepted_uw))


# ----------------------------------------------------------------------------------- #
# Plot the distribution of the remaining (accepted) measurements:
# ----------------------------------------------------------------------------------- #
HistAcc_L30cm = TH1F("HistAcc_L30cm", "Accepted lengths estimates by 30cm ruler",      100, 3.0, 3.5);
HistAcc_L2m   = TH1F("HistAcc_L2m",   "Accepted lengths estimates by 2m folding rule", 200, 3.0, 3.5);

for entry in L30cm_accepted_uw : HistAcc_L30cm.Fill(entry)
for entry in L2m_accepted_uw   : HistAcc_L2m.Fill(entry)

canvas2 = TCanvas( "canvas2", "canvas2", 100, 100, 1200, 600 )
canvas2.Divide(1,2)

minL = 3.3
maxL = 3.5

canvas2.cd(1)
canvas2.SetLogy()
HistAcc_L30cm.GetXaxis().SetRangeUser(minL, maxL)     # From the histogram, we get the x-axis, and set the range.
HistAcc_L30cm.GetXaxis().SetTitle("Table length (m)") # We give the x-axis a label.
HistAcc_L30cm.GetYaxis().SetTitle("Frequency")        # We give the y-axis a label.
HistAcc_L30cm.SetLineColor(kBlue)                     # Set the histogram line color to blue.
HistAcc_L30cm.SetLineWidth(2)                         # Set the histogram line width to two.

# Define a resonable fitting function:
fitAcc_L30cm = TF1("fitAcc_L30cm", "gaus", 3.33, 3.39)# Define a (predefined) Gaussian fitting function.
fitAcc_L30cm.SetLineColor(kRed)                       # Set the line color to red.
fitAcc_L30cm.SetLineWidth(2)                          # Set tne line width to 2.
HistAcc_L30cm.Fit("fitAcc_L30cm", "QRL")              # Note the two options: Q for Quiet and R for Range.
HistAcc_L30cm.Draw()                                  # Draw the histogram

canvas2.cd(2)
canvas2.SetLogy()
HistAcc_L2m.GetXaxis().SetRangeUser(minL, maxL)     # From the histogram, we get the x-axis, and set the range.
HistAcc_L2m.GetXaxis().SetTitle("Table length (m)") # We give the x-axis a label.
HistAcc_L2m.GetYaxis().SetTitle("Frequency")        # We give the y-axis a label.
HistAcc_L2m.SetLineColor(kBlue)                     # Set the histogram line color to blue.
HistAcc_L2m.SetLineWidth(2)                         # Set the histogram line width to two.

# Define a resonable fitting function:
fitAcc_L2m = TF1("fitAcc_L2m", "gaus", 3.34, 3.38)  # Define a (predefined) Gaussian fitting function.
fitAcc_L2m.SetLineColor(kRed)                       # Set the line color to red.
fitAcc_L2m.SetLineWidth(2)                          # Set tne line width to 2.
HistAcc_L2m.Fit("fitAcc_L2m", "QRL")                # Note the two options: Q for Quiet and R for Range.
HistAcc_L2m.Draw()

canvas2.Update()
if (SavePlots) : canvas2.SaveAs("TableMeasurements_Accepted.pdf")


# ----------------------------------------------------------------------------------- #
# Data analysis (mostly done!) and results for unweighted result:
# ----------------------------------------------------------------------------------- #

print "\n  ---------------------------   UNWEIGHTED results  ---------------------------------"

# Get the mean, RMS and error on mean from the list of accepted measurements - 30cm:
meanrms30 = MeanRMS(L30cm_accepted_uw)
mean_uw30  = meanrms30[0]
rms_uw30   = meanrms30[1]
emean_uw30 = meanrms30[1] / sqrt(len(L30cm_accepted_uw))
print "  30cm:  Unweighted Mean = %7.5f +- %7.5f      RMS = %7.5f   (N = %3d)"%(mean_uw30+Lblind, emean_uw30, rms_uw30, len(L30cm_accepted_uw))

# Get the mean, RMS and error on mean from the list of accepted measurements - 2m:
meanrms2m = MeanRMS(L2m_accepted_uw)
mean_uw2m  = meanrms2m[0]
rms_uw2m   = meanrms2m[1]
emean_uw2m = meanrms2m[1] / sqrt(len(L2m_accepted_uw))
print "  2m:    Unweighted Mean = %7.5f +- %7.5f      RMS = %7.5f   (N = %3d)"%(mean_uw2m+Lblind, emean_uw2m, rms_uw2m, len(L2m_accepted_uw))

print "  Improvement in error from naive to selective, unweighted - 30cm: %4.1f"%(emean30 / emean_uw30)
print "  Improvement in error from naive to selective, unweighted - 2m:   %4.1f"%(emean2m / emean_uw2m)

diff_uw  = mean_uw30 - mean_uw2m
ediff_uw = sqrt(sqr(emean_uw30) + sqr(emean_uw30))
nsig_uw  = diff_uw / ediff_uw
print "  Correspondence between unweighted 30cm and 2m measurement:  Diff = %7.5f +- %7.5f   (%4.2f sigma)"%(diff_uw, ediff_uw, nsig_uw)


# ----------------------------------------------------------------------------------- #
# Solution note:
# --------------
# The reduction of the uncertainty from the "naive" 11mm and 14mm to 0.61mm and 0.32mm
# is an improvement by a factor of about 20 and 50, for the 30cm and 2m cases respectively,
# This corresponds to 20*20 = 400 and 50*50 = 2500 as many measurements!!!
# So if the naive uncertainty was to be decreased to the more clever (unweighted) one,
# it would in the 2m case require the entire adult population of Copenhagen to measure it!
# 
# Regarding the inclusion of measurements with +-30cm and +-2m bias, I note that the
# uncertainty on the mean does not decrease from including these measurements, and I
# therefore decide NOT to include them. Note that this conclusion can be done, even if
# the central values are blinded!
#
# Finally, despite the very small uncertainties, the two results are still in very good
# agreement with each other.
# ----------------------------------------------------------------------------------- #


# ----------------------------------------------------------------------------------- #
# ----------------------------------------------------------------------------------- #
# Detailed data analysis for weighted result - looking closer at the uncertainties:
# ----------------------------------------------------------------------------------- #
# ----------------------------------------------------------------------------------- #

print "\n  ------------------------   PULL - Cleaning weighted events  ------------------------------"


# -------------------------------------------------------------------------
# Make a list with all VALID and CORRECTED measurements for weighted case:
# -------------------------------------------------------------------------

# First we have to "remake" the list of valid and corrected measurements, as this takes
# on a new definition in the weighted case, in that we now also have to consider the
# uncertainties (sometimes given as -1, i.e. not included).
# One could possibly also require the uncertainties to be above some absolute minimum,
# which I choose to apply for just one measurement (2m claiming 0.1mm uncertainty!).

L30cm_corrected_w = []
eL30cm_corrected_w = []
for i, entry in enumerate(L30cm) :
    if (0.0 < entry < 5.0 and eL30cm[i] > 0.0) :
        if   (Correct30cmOff and mean_uw30-0.45 < entry < mean_uw30-0.15) :
            L30cm_corrected_w.append(entry+0.30)
        elif (Correct30cmOff and mean_uw30+0.15 < entry < mean_uw30+0.45) :
            L30cm_corrected_w.append(entry-0.30)
        else                                                              :
            L30cm_corrected_w.append(entry)
        if (eL30cm[i] < 0.0005) : eL30cm_corrected_w.append(0.001)        # Set minimum uncertainty to 0.5mm
        else                    : eL30cm_corrected_w.append(eL30cm[i])

L2m_corrected_w = []
eL2m_corrected_w = []
for i, entry in enumerate(L2m) :
    if (0.0 < entry < 5.0 and eL2m[i] > 0.0) :
        if   (Correct2mOff and mean_uw30-3.0 < entry < mean_uw30-1.0) :
            L2m_corrected_w.append(entry+2.0)
        elif (Correct2mOff and mean_uw30+3.0 < entry < mean_uw30+1.0) :
            L2m_corrected_w.append(entry-2.0)
        else                                                          :
            L2m_corrected_w.append(entry)
        if (eL2m[i] < 0.00025) : eL2m_corrected_w.append(0.00025)        # Set minimum uncertainty to 0.25mm
        else                   : eL2m_corrected_w.append(eL2m[i])


# ----------------------------------------------------------------------------------- #
# Solution note:
# --------------
# In order to consider a weighted result, we need to inspect the uncertainties, and
# see if they make sense and if they carry any additional information. In principle
# we could investigate, if the errors are correlated with the residual |value - mean|.
#
# However note that even perfect errors would not have a correlation coefficient very
# high! The real test of whether the uncertainties are reliable, is obtained by making
# a so-called pull plot, which means plotting the pull = (x_i - mean) / sigma_i
# distribution.
# If the errors are credible, it should be a unit Gaussian (think about why!). And if
# measurements are too far away (in terms of their error), then they are not very
# credible.
# ----------------------------------------------------------------------------------- #

# Maximum number of sigma's we'll allow a measurement to be away from the (approximate) mean:
max_pull = 4.5
L30cm_accepted_w  = []
eL30cm_accepted_w = []
L2m_accepted_w    = []
eL2m_accepted_w   = []

# Produce the pull distributions (and ensure that the errors are valid!):
pull_range = 12.0
Hist_30cm_Pull = TH1F("Hist_30cm_Pull", "Pull distribution - 30cm", 120, -pull_range, pull_range);
Hist_2m_Pull   = TH1F("Hist_2m_Pull",   "Pull distribution - 2m",   120, -pull_range, pull_range);

for i, entry in enumerate(L30cm_corrected_w) :
    pull = (entry-mean_uw30) / eL30cm_corrected_w[i]
    Hist_30cm_Pull.Fill(pull)
    if (abs(pull) < max_pull) :
        L30cm_accepted_w.append(entry)
        eL30cm_accepted_w.append(eL30cm_corrected_w[i])
    else :                      # Give warning, if any points are more than 4 sigma away!
        if (Verbose) : print "  30cm:  Warning! Large pull:  L = %6.4f +- %6.4f    pull = %6.2f"%(L30cm_corrected_w[i], eL30cm_corrected_w[i], pull)

for i, entry in enumerate(L2m_corrected_w) :
    pull = (entry-mean_uw2m) / eL2m_corrected_w[i]
    Hist_2m_Pull.Fill(pull)
    if (abs(pull) < max_pull) :
        L2m_accepted_w.append(entry)
        eL2m_accepted_w.append(eL2m_corrected_w[i])
    else :                      # Give warning, if any points are more than 4 sigma away!
        if (Verbose) : print "  2m:  Warning! Large pull:  L = %6.4f +- %6.4f    pull = %6.2f"%(L2m_corrected_w[i], eL2m_corrected_w[i], pull)

# At the very least note, how many points were rejected:
print "The number of rejected 30cm measurements (weighted) was: %3d"%(len(L30cm_corrected_w) - len(L30cm_accepted_w))
print "The number of rejected  2m  measurements (weighted) was: %3d"%(len(L2m_corrected_w)   - len(L2m_accepted_w))


# Plot the pull distributions:
# --------------------------------------------------------------------
canvas3 = TCanvas( "canvas3", "canvas3", 150, 150, 1200, 600 )
canvas3.Divide(2,1)

canvas3.cd(1)
Hist_30cm_Pull.GetXaxis().SetRangeUser(-11.0, 11.0)    # From the histogram, we get the x-axis, and set the range.
Hist_30cm_Pull.GetXaxis().SetTitle("Pull")             # We give the x-axis a label.
Hist_30cm_Pull.GetYaxis().SetTitle("Frequency")        # We give the y-axis a label.
Hist_30cm_Pull.SetMarkerColor(kBlue)                   # Set the histogram line color to blue.
Hist_30cm_Pull.SetMarkerStyle(20)                      # Set the histogram line width to two.

fit_pull30 = TF1("fit_pull30", "gaus", -max_pull, max_pull)   # Define a (predefined) Gaussian fitting function.
fit_pull30.SetLineColor(kRed)                       # Set the line color to red.
fit_pull30.SetLineWidth(2)                          # Set tne line width to 2.
Hist_30cm_Pull.Fit("fit_pull30", "LRQ")             # Note options: L for Likelihood, R for Range, Q for Quiet

pi = 3.141592653
binwidth = 0.2
def G2(x, p) :
    norm1 = 1.0 / sqrt(2.0*pi) / p[3]
    z1 = ((x[0]) - p[2]) / p[3]
    norm2 = 1.0 / sqrt(2.0*pi) / p[4]
    z2 = ((x[0]) - p[2]) / p[4]
    return p[0] * binwidth * (p[1] * norm1 * exp(-0.5 *z1*z1) + (1.0-p[1]) * norm2 * exp(-0.5 *z2*z2))

fit2_pull30 = TF1("fit2_pull30", G2, -max_pull, max_pull, 5)   # Define a (predefined) Gaussian fitting function.
fit2_pull30.SetParameters(100, 0.5, 0.0, 0.5, 1.5)
fit2_pull30.SetLineColor(kMagenta)                       # Set the line color to red.
fit2_pull30.SetLineWidth(2)                          # Set tne line width to 2.
Hist_30cm_Pull.Fit("fit2_pull30", "LR+")             # Note options: L for Likelihood, R for Range, Q for Quiet
Hist_30cm_Pull.Draw()                               # Draw the histogram

canvas3.cd(2)
Hist_2m_Pull.GetXaxis().SetRangeUser(-11.0, 11.0)    # From the histogram, we get the x-axis, and set the range.
Hist_2m_Pull.GetXaxis().SetTitle("Pull")             # We give the x-axis a label.
Hist_2m_Pull.GetYaxis().SetTitle("Frequency")        # We give the y-axis a label.
Hist_2m_Pull.SetMarkerColor(kBlue)                   # Set the histogram line color to blue.
Hist_2m_Pull.SetMarkerStyle(20)                      # Set the histogram line width to two.

fit_pull2m = TF1("fit_pull2m", "gaus", -max_pull, max_pull)   # Define a (predefined) Gaussian fitting function.
fit_pull2m.SetLineColor(kRed)                       # Set the line color to red.
fit_pull2m.SetLineWidth(2)                          # Set tne line width to 2.
Hist_2m_Pull.Fit("fit_pull2m", "LRQ")               # Note options: L for Likelihood, R for Range, Q for Quiet

fit2_pull2m = TF1("fit_pull2m", G2, -max_pull, max_pull, 5)   # Define a (predefined) Gaussian fitting function.
fit2_pull2m.SetParameters(100, 0.5, 0.0, 0.5, 1.5)
fit2_pull2m.SetLineColor(kMagenta)                       # Set the line color to red.
fit2_pull2m.SetLineWidth(2)                          # Set tne line width to 2.
Hist_2m_Pull.Fit("fit_pull2m", "LR+")               # Note options: L for Likelihood, R for Range, Q for Quiet
Hist_2m_Pull.Draw()                                 # Draw the histogram

canvas3.Update()
if (SavePlots) : canvas3.SaveAs("Distribution_Pull.pdf")


# ----------------------------------------------------------------------------------- #
# Solution note:
# --------------
# The pull distributions show, that the uncertainties given are reasonable in size
# (sigma in the Gaussian fit is about 1.0, so one is dividing by reasonable errors!).
# So the errors given by those who did a good measurement actually look reasonably
# trustworthy, except that there are large peaks around zero, which are from serious
# overestimates of the uncertainty (also giving room for too small errors). There is
# thus no guarentee, that using the uncertainties will improve the result.
# ----------------------------------------------------------------------------------- #

print "\n  ---------------------------   WEIGHTED results  ---------------------------------"


# Calculate the weighted for 30cm ruler case:
# -------------------------------------------
sum0_w30 = 0.0
sum1_w30 = 0.0
for i, entry in enumerate ( L30cm_accepted_w ) :
    sum0_w30 += 1.0   / sqr(eL30cm_accepted_w[i])
    sum1_w30 += entry / sqr(eL30cm_accepted_w[i])
mean_w30  = sum1_w30 / sum0_w30
emean_w30 = sqrt(1.0 / sum0_w30)   # If the pull was "bad", I would multiply by the pull width,
                                   # to make up for general (under-) overestimates of uncertainties.

# Given that we now have the mean, calculate the Chi2:
chi2_w30 = 0.0
for i, entry in enumerate ( L30cm_accepted_w ) :
    chi2_w30 += sqr((entry - mean_w30) / eL30cm_accepted_w[i])

print "  30cm:  Weighted Mean = %7.5f +- %7.5f      p(Chi2=%5.1f, Ndof=%3d) = %6.4f   (N = %3d)"%(mean_w30+Lblind, emean_w30, chi2_w30, len(L30cm_accepted_w)-1, TMath.Prob(chi2_w30, len(L30cm_accepted_w)-1), len(L30cm_accepted_w))


# Calculate the weighted for 2m ruler case:
# -----------------------------------------
sum0_w2m = 0.0
sum1_w2m = 0.0
for i, entry in enumerate ( L2m_accepted_w ) :
    sum0_w2m += 1.0   / sqr(eL2m_accepted_w[i])
    sum1_w2m += entry / sqr(eL2m_accepted_w[i])
mean_w2m  = sum1_w2m / sum0_w2m
emean_w2m = sqrt(1.0 / sum0_w2m)   # If the pull was "bad", I would multiply by the pull width,
                                   # to make up for general (under-) overestimates of uncertainties.

# Given that we now have the mean, calculate the Chi2:
chi2_w2m = 0.0
for i, entry in enumerate ( L2m_accepted_w ) :
    chi2_w2m += sqr((entry - mean_w2m) / eL2m_accepted_w[i])

print "  2m:    Weighted Mean = %7.5f +- %7.5f      p(Chi2=%5.1f, Ndof=%3d) = %6.4f   (N = %3d)"%(mean_w2m+Lblind, emean_w2m, chi2_w2m, len(L2m_accepted_w)-1, TMath.Prob(chi2_w2m, len(L2m_accepted_w)-1), len(L2m_accepted_w))


# Again consider the improvements and relative difference:
print "  Improvement in error from selected unweighted to weighted - 30cm: %4.1f"%(emean_uw30 / emean_w30)
print "  Improvement in error from selected unweighted to weighted - 2m:   %4.1f"%(emean_uw2m / emean_w2m)


# ----------------------------------------------------------------------------------- #
# Solution note:
# --------------
# The weighted means make use of the uncertainties, and further decrease the error
# on the final mean. In order for the uncertainties used to be realistic, one could
# (as said) have scaled them by the width of the pull distribution (think about why!),
# but that is not needed in this case, as they are very close to unity.
# 
# The final results are indeed very accurate. As a cross check, we check if they are
# consistent.
# ----------------------------------------------------------------------------------- #

diff_w  = mean_w30 - mean_w2m
ediff_w = sqrt(sqr(emean_w30) + sqr(emean_w2m))
nsig_w  = diff_w / ediff_w
print "  Correspondence between weighted 30cm and 2m measurement:  Diff = %7.5f +- %7.5f   (%4.2f sigma)"%(diff_w, ediff_w, nsig_w)


# ----------------------------------------------------------------------------------- #
# Solution note:
# --------------
# The results agree remarkably well (I suspected it would be worse!), and thus we can
# actually have some confidence in the (very) small uncertainties.
#
# The completely final result is of course a weighted average of the two weighted
# results!
#
# However, there might be a problem in that the result shifted significantly, when
# moving from the unweighted to the weighted analysis!!!
# ----------------------------------------------------------------------------------- #

diff30_w_uw  = mean_w30 - mean_uw30
ediff30_w_uw = sqrt(sqr(emean_w30) + sqr(emean_uw30))
nsig30_w_uw  = diff30_w_uw / ediff30_w_uw
print "\n  Correspondence between unweighted and weighted 30cm measurement:  Diff = %7.5f +- %7.5f   (%4.2f sigma)"%(diff30_w_uw, ediff30_w_uw, nsig30_w_uw)

diff2m_w_uw  = mean_w2m - mean_uw2m
ediff2m_w_uw = sqrt(sqr(emean_w2m) + sqr(emean_uw2m))
nsig2m_w_uw  = diff2m_w_uw / ediff2m_w_uw
print "  Correspondence between unweighted and weighted 2m measurement:    Diff = %7.5f +- %7.5f   (%4.2f sigma)"%(diff2m_w_uw, ediff2m_w_uw, nsig2m_w_uw)


# Correlation between (signed) residual and measurement weight (= 1/uncertainty^2):
# ----------------------------------------------------------------------------------- #
Hist_30cm_CorrResUnc = TH2F("Hist_30cm_CorrResUnc", "", 100, -0.1, 0.1, 60, 0.0, 1200000.0);
Hist_2m_CorrResUnc   = TH2F("Hist_2m_CorrResUnc",   "", 100, -0.1, 0.1, 50, 0.0, 5000000.0);

for i, entry in enumerate(L30cm_accepted_w) :
    Hist_30cm_CorrResUnc.Fill(entry-mean_uw30, 1.0/sqr(eL30cm_accepted_w[i]))

for i, entry in enumerate(L2m_accepted_w) :
    Hist_2m_CorrResUnc.Fill(entry-mean_uw2m, 1.0/sqr(eL2m_accepted_w[i]))


canvas4 = TCanvas( "canvas4", "canvas4", 180, 180, 1200, 600 )
canvas4.Divide(2,1)

canvas4.cd(1)
Hist_30cm_CorrResUnc.GetXaxis().SetRangeUser(-0.025, 0.025)
Hist_30cm_CorrResUnc.GetXaxis().SetTitle("Residual")
Hist_30cm_CorrResUnc.GetYaxis().SetTitle("Weight")
Hist_30cm_CorrResUnc.SetMarkerColor(kBlue)
Hist_30cm_CorrResUnc.SetMarkerStyle(20)
Hist_30cm_CorrResUnc.Draw()

canvas4.cd(2)
Hist_2m_CorrResUnc.GetXaxis().SetRangeUser(-0.025, 0.025)
Hist_2m_CorrResUnc.GetXaxis().SetTitle("Residual")
Hist_2m_CorrResUnc.GetYaxis().SetTitle("Weight")
Hist_2m_CorrResUnc.SetMarkerColor(kBlue)
Hist_2m_CorrResUnc.SetMarkerStyle(20)
Hist_2m_CorrResUnc.Draw()

print("  Correlation between residual and weight (30cm): %5.3f"%(Hist_30cm_CorrResUnc.GetCorrelationFactor()))
print("  Correlation between residual and weight (2m):   %5.3f"%(Hist_2m_CorrResUnc.GetCorrelationFactor()))

canvas4.Update()
if (SavePlots) : canvas4.SaveAs("Correlation_ResidualUncertainty.pdf")

# ----------------------------------------------------------------------------------- #
# Solution note:
# --------------
# So, we do see a shift in the mean from the unweighted to the weighted result. It is
# right around that critical limit, where we start suspecting that it is a real effect
# and not just a statistical fluctuation.
# So, people who measure slightly longer tend to have a slightly smaller error. As the
# uncertainties are the result of partial guess work (and not delicate error estimation
# and propagation), I would here tend to think, that this effect should at the very
# least be stated! But otherwise, the weighted result us usually the one to use.
# ----------------------------------------------------------------------------------- #


# ----------------------------------------------------------------------------------- #
# Fitting solution of RAW data:
# ----------------------------------------------------------------------------------- #

# ---------------------------------------- Fit 30cm case --------------------------------------------

canvas5 = TCanvas( "canvas5", "canvas5", 250, 250, 1600, 1000 )
# canvas5.Divide(1,2)

minL = 3.0
maxL = 3.7

# Set all the trimmings for the histogram plotting:
# canvas5.cd(1)
Hist_L30cm.SetMaximum(110.0)
Hist_L30cm.GetXaxis().SetRangeUser(minL, maxL)     # From the histogram, we get the x-axis, and set the range.
Hist_L30cm.GetXaxis().SetTitle("Table length (m)") # We give the x-axis a label.
Hist_L30cm.GetYaxis().SetTitle("Frequency")        # We give the y-axis a label.
Hist_L30cm.SetLineColor(kBlue)                     # Set the histogram line color to blue.
Hist_L30cm.SetLineWidth(2)                         # Set the histogram line width to two.

Hist_L30cm.Draw()                                  # Draw the histogram

# Legend:
legend = TLegend(0.12, 0.68, 0.45, 0.88)
legend.SetLineColor(1)
legend.SetFillColor(0)
legend.AddEntry(Hist_L30cm, "Data", "L")
legend.AddEntry(fit_L30cm, "Simple Gaussian", "L")
#legend.AddEntry(fit2_L30cm, "Double Gaussian", "L")
#legend.AddEntry(fit3_L30cm, "Double Gaussian x 3", "L")
#legend.AddEntry(fit4_L30cm, "Triple Gaussian x 3", "L")
legend.Draw()

canvas5.Update()
if (SavePlots) : canvas5.SaveAs("TableMeasurements_FinalFitAnalysis_30cm_1gauss.pdf")   # Save plot (format follow extension name)


binwidth = 0.005

# Define a resonable fitting function:
# ------------------------------------
def G2_30cm(x, p) :
    norm1 = 1.0 / sqrt(2.0*pi) / p[3]
    z1 = (x[0] - p[2]) / p[3]
    norm2 = 1.0 / sqrt(2.0*pi) / p[4]
    z2 = (x[0] - p[2]) / p[4]
    return p[0] * binwidth * (p[1] * norm1 * exp(-0.5 *z1*z1) + (1.0-p[1]) * norm2 * exp(-0.5 *z2*z2))

fit2_L30cm = TF1("fit2_L30cm", G2_30cm, 3.32, 3.40, 5)     # Define a (predefined) Gaussian fitting function.
fit2_L30cm.SetParameters(500.0, 0.5, 3.36, 0.005, 0.018)    # Set the starting parameters to reasonable values.
fit2_L30cm.SetLineColor(kMagenta)                       # Set the line color to red.
fit2_L30cm.SetLineWidth(2)                          # Set tne line width to 2.
fit2_L30cm.SetNpx(1000)
Hist_L30cm.Fit("fit2_L30cm", "R+")                  # Note the two options: Q for Quiet and R for Range.

Hist_L30cm.Draw()                                  # Draw the histogram

# Legend:
legend.AddEntry(fit2_L30cm, "Double Gaussian", "L")
legend.Draw()

canvas5.Update()
if (SavePlots) : canvas5.SaveAs("TableMeasurements_FinalFitAnalysis_30cm_2gauss.pdf")   # Save plot (format follow extension name)


# Expand fitting function to include two peaks on the side and outliers:
# ----------------------------------------------------------------------
def G2_3peak_30cm(x, p) :

    # Given common widths, the normalisations are the same for all three peaks:
    norm1 = 1.0 / sqrt(2.0*pi) / p[3]
    norm2 = 1.0 / sqrt(2.0*pi) / p[4]

    # z-values for each peak - note the constant shift, and that there are only fit two parameters:
    z1 = (x[0] - p[2]) / p[3]
    z2 = (x[0] - p[2]) / p[4]
    z1_low = ((x[0] + 0.300) - p[2]) / p[3]
    z2_low = ((x[0] + 0.300) - p[2]) / p[4]
    z1_high = ((x[0] - 0.300) - p[2]) / p[3]
    z2_high = ((x[0] - 0.300) - p[2]) / p[4]

    # Putting the three peaks together
    G      = p[1] * norm1 * exp(-0.5 *z1**2)      + (1.0-p[1]) * norm2 * exp(-0.5 *z2**2)
    G_low  = p[1] * norm1 * exp(-0.5 *z1_low**2)  + (1.0-p[1]) * norm2 * exp(-0.5 *z2_low**2)
    G_high = p[1] * norm1 * exp(-0.5 *z1_high**2) + (1.0-p[1]) * norm2 * exp(-0.5 *z2_high**2)

    # Return the full expression:
    return binwidth * (p[0] * G  +  p[5] * G_low  +  p[6] * G_high)

fit3_L30cm = TF1("fit3_L30cm", G2_3peak_30cm, 3.02, 3.70, 7)   # Define a (predefined) Gaussian fitting function.
fit3_L30cm.SetParameters(500.0, 0.5, 3.36, 0.003, 0.013, 20.0, 10.0)         # Set the starting parameters to reasonable values.
fit3_L30cm.SetLineColor(kGreen)                       # Set the line color to red.
fit3_L30cm.SetLineWidth(2)                          # Set tne line width to 2.
Hist_L30cm.Fit("fit3_L30cm", "RL+")                  # Note the two options: Q for Quiet and R for Range.

Hist_L30cm.Draw()                                  # Draw the histogram

# Legend:
legend.AddEntry(fit3_L30cm, "Double Gaussian x 3", "L")
legend.Draw()

canvas5.Update()
if (SavePlots) : canvas5.SaveAs("TableMeasurements_FinalFitAnalysis_30cm_3_2gauss.pdf")   # Save plot (format follow extension name)


# Expand further to include a PDF accounting for outliers:
# --------------------------------------------------------
def G2_3peak_outliers_30cm(x, p) :

    # Given common widths, the normalisations are the same for all three peaks:
    norm1 = 1.0 / sqrt(2.0*pi) / p[3]
    norm2 = 1.0 / sqrt(2.0*pi) / p[4]
    norm_outl = 1.0 / sqrt(2.0*pi) / p[8]

    # z-values for each peak - note the constant shift, and that there are only fit three parameters:
    z1 = (x[0] - p[2]) / p[3]
    z2 = (x[0] - p[2]) / p[4]
    z_outl = (x[0] - p[2]) / p[8]
    z1_low = ((x[0] + 0.300) - p[2]) / p[3]
    z2_low = ((x[0] + 0.300) - p[2]) / p[4]
    z_outl_low = ((x[0] + 0.300) - p[2]) / p[8]
    z1_high = ((x[0] - 0.300) - p[2]) / p[3]
    z2_high = ((x[0] - 0.300) - p[2]) / p[4]
    z_outl_high = ((x[0] - 0.300) - p[2]) / p[8]

    # Putting the three peaks together:
    G      = p[1] * norm1 * exp(-0.5 *z1**2)      + (1.0-p[1]-p[7]) * norm2 * exp(-0.5 *z2**2)      + p[7] * norm_outl * exp(-0.5 * z_outl**2)
    G_low  = p[1] * norm1 * exp(-0.5 *z1_low**2)  + (1.0-p[1]-p[7]) * norm2 * exp(-0.5 *z2_low**2)  + p[7] * norm_outl * exp(-0.5 * z_outl_low**2)
    G_high = p[1] * norm1 * exp(-0.5 *z1_high**2) + (1.0-p[1]-p[7]) * norm2 * exp(-0.5 *z2_high**2) + p[7] * norm_outl * exp(-0.5 * z_outl_high**2)

    # Gaussian (very wide) to account for outliers, yet still around same mean:
    # G_outliers = 

    # Alternatively, flat distribution to account for outliers:
    # PDF_outliers = 1.0

    # Return the full expression:
    return binwidth * (p[0] * G  +  p[5] * G_low  +  p[6] * G_high)


fit4_L30cm = TF1("fit4_L30cm", G2_3peak_outliers_30cm, 3.02, 3.70, 9)   # Define a (predefined) Gaussian fitting function.
fit4_L30cm.SetParameters(500.0, 0.5, 3.36, 0.003, 0.013, 20.0, 10.0, 0.05, 0.10)         # Set the starting parameters to reasonable values.
fit4_L30cm.SetParNames("NevCentralPeak", "FracCore", "Mean", "WidthCore", "WidthWide", "NevLowPeak", "NevHighPeak", "FracOutliers", "WidthOutliers")         # Set parameter names (documentation!)
fit4_L30cm.SetLineColor(kBlack)                       # Set the line color to red.
fit4_L30cm.SetLineWidth(2)                          # Set tne line width to 2.
fit4_L30cm.SetNpx(1000)
Hist_L30cm.Fit("fit4_L30cm", "RL+")                  # Note the two options: Q for Quiet and R for Range.

Hist_L30cm.Draw()                                  # Draw the histogram

# Legend:
legend = TLegend(0.12, 0.68, 0.45, 0.88)
legend.SetLineColor(1)
legend.SetFillColor(0)
legend.AddEntry(Hist_L30cm, "Data", "L")
legend.AddEntry(fit_L30cm, "Simple Gaussian", "L")
legend.AddEntry(fit2_L30cm, "Double Gaussian", "L")
legend.AddEntry(fit3_L30cm, "Double Gaussian x 3", "L")
legend.AddEntry(fit4_L30cm, "Triple Gaussian x 3", "L")
legend.Draw()

canvas5.Update()
if (SavePlots) : canvas5.SaveAs("TableMeasurements_FinalFitAnalysis_30cm.pdf")   # Save plot (format follow extension name)

print("  Single Gaussian fit:                 L = %7.5f +- %7.5f m"%(fit_L30cm.GetParameter(1),  fit_L30cm.GetParError(1)))
print("  Double Gaussian fit:                 L = %7.5f +- %7.5f m"%(fit2_L30cm.GetParameter(2), fit2_L30cm.GetParError(2)))
print("  3 x Double Gaussian:                 L = %7.5f +- %7.5f m"%(fit3_L30cm.GetParameter(2), fit3_L30cm.GetParError(2)))
print("  3 x Triple Gaussian (outlier) fit:   L = %7.5f +- %7.5f m"%(fit4_L30cm.GetParameter(2), fit4_L30cm.GetParError(2)))


# ---------------------------------------- Fit 2m case --------------------------------------------

canvas6 = TCanvas( "canvas6", "canvas6", 270, 270, 1600, 1000 )
# canvas5.cd(2)
Hist_L2m.GetXaxis().SetRangeUser(minL, maxL)     # From the histogram, we get the x-axis, and set the range.
Hist_L2m.GetXaxis().SetTitle("Table length (m)") # We give the x-axis a label.
Hist_L2m.GetYaxis().SetTitle("Frequency")        # We give the y-axis a label.
Hist_L2m.SetLineColor(kBlue)                     # Set the histogram line color to blue.
Hist_L2m.SetLineWidth(2)                         # Set the histogram line width to two.

binwidth = 0.005
def G2_2m(x, p) :
    norm1 = 1.0 / sqrt(2.0*pi) / p[3]
    z1 = ((x[0]) - p[2]) / p[3]
    norm2 = 1.0 / sqrt(2.0*pi) / p[4]
    z2 = ((x[0]) - p[2]) / p[4]
    return p[0] * binwidth * (p[1] * norm1 * exp(-0.5 *z1*z1) + (1.0-p[1]) * norm2 * exp(-0.5 *z2*z2))

# Define a resonable fitting function:
fit3_L2m = TF1("fit3_L2m", G2_2m, 3.32, 3.40, 5)     # Define a (predefined) Gaussian fitting function.
fit3_L2m.SetParameters(50.0, 0.5, 3.36, 0.0025, 0.0075)        # Set the starting parameters to reasonable values.
fit3_L2m.SetLineColor(kMagenta)                       # Set the line color to red.
fit3_L2m.SetLineWidth(2)                          # Set tne line width to 2.
fit3_L2m.SetNpx(1000)
Hist_L2m.Fit("fit3_L2m", "QR+")                    # Note the two options: Q for Quiet and R for Range.
Hist_L2m.GetXaxis().SetRangeUser(3.30, 3.42)
Hist_L2m.Draw()

canvas6.Update()
if (SavePlots) : canvas6.SaveAs("TableMeasurements_FinalFitAnalysis_2m.pdf")   # Save plot (format follow extension name)


# Compute approximate width of central peak, to get an estimate of the uncertainty on (good) measurements:
#mean30_peakfit = fit_L30cm.GetParameter(1)
#rms30_peakfit = fit_L30cm.GetParameter(2)
#mean2m_peakfit = fit_L2m.GetParameter(1)
#rms2m_peakfit = fit_L2m.GetParameter(2)


raw_input('Press Enter to exit')