#!/usr/bin/env python

# ----------------------------------------------------------------------------------- #
#
#  Python/ROOT macro for exercise on calibration.
#
#  You're developping a new method for measuring the distance to stars, and want to
#  calibrate and improve this method, such that the precision obtained is unbiased and
#  has a minimal variance. You know that the method depends on several factors, such as:
#   * Amount of signal light from the star (lsig)
#   * Amount of background light in the surrounding sky (lbkg)
#   * Temperature of star (temp)
#   * Transparency of sky (tsky)
#
#  In order to determine the influence of these factors, and how much you need to correct
#  for each of them, you consider 10.000 stars with known distances (measured by another
#  method). From these, you can find how well your own method works, make corrections to
#  biases as needed, and finally find out how precise your calibrated method is. Happy
#  calibration.
#
#  Author: Troels C. Petersen (NBI)
#  Email:  petersen@nbi.dk
#  Date:   1st of October 2014
#
# ----------------------------------------------------------------------------------- #

from ROOT import *
from array import array

# Setting what to be shown in statistics box:
gStyle.SetOptStat("emr")
gStyle.SetOptFit(1111)

verbose = True
Nverbose = 10

SaveFigures = false


# ----------------------------------------------------------------------------------- #
def sqr(a) : return a*a
# ----------------------------------------------------------------------------------- #


# ----------------------------------------------------------------------------------- #
# Define files, lists, counters and read data:
# ----------------------------------------------------------------------------------- #

file = "data_calib.txt"
dknown = []
dmeas  = []
lsig   = []
lbkg   = []
temp   = []
tsky   = []

Nread = 0

# Loop over files and open them
with open( file, 'r' ) as f : 

    # Loop over lines and extract variables of interest
    for line in f:
        line = line.strip()
        columns = line.split()
        if (len(columns) == 6) :
            
            # Put the values read into lists:
            dknown.append(float(columns[0]))
            dmeas.append(float(columns[1]))
            lsig.append(float(columns[2]))
            lbkg.append(float(columns[3]))
            temp.append(float(columns[4]))
            tsky.append(float(columns[5]))
            if (verbose and Nread < Nverbose) :
                print "  Distance (known) = %8.2f    Distance (measured) = %6.3f "%(dknown[-1], dmeas[-1])
            Nread += 1

f.close()


# ------------------------------------------------------------------ #
# Make Histograms and vectors:
# ------------------------------------------------------------------ #

Hist_Raw   = TH1F("Hist_Raw",   "Hist_Raw",   80, -2.0, 2.0)
Hist_Calib = TH1F("Hist_Calib", "Hist_Calib", 80, -2.0, 2.0)

# Example histograms of "Luminosity Signal (lsig)" distribution
Hist_lsig   = TH1F("Hist_lsig",   "Hist_lsig",   50, 0.0, 50.0)
Hist_lsig2D = TH2F("Hist_lsig2D", "Hist_lsig2D", 50, 0.0, 50.0,  50, -1.0, 1.0)


# ------------------------------------------------------------------ #
# Loop over data and make plots:
# ------------------------------------------------------------------ #

for i in range (len(dknown)) :

    # Note the initial relative resolution:
    distrel = (dmeas[i] - dknown[i]) / dknown[i]
    Hist_Raw.Fill(distrel)

    # Do calibration (expand this yourself!):
    dmeas_calib = dmeas[i]    # Make corrections that gives a better resolution here...

    # Note the final relative resolution:
    distrel_calib = (dmeas_calib - dknown[i]) / dknown[i]
    Hist_Calib.Fill(distrel_calib)

    # Histograms for making calibration:
    Hist_lsig.Fill(lsig[i])
    Hist_lsig2D.Fill(lsig[i], distrel)


print "  The initial and final resolutions are:  %6.3f  and  %6.3f \n"%(Hist_Raw.GetRMS(), Hist_Calib.GetRMS())


# ------------------------------------------------------------------ #
# Show the distribution of fitting results:
# ------------------------------------------------------------------ #

c0 = TCanvas("c0","", 20, 50, 600, 400)
    
Hist_Raw.GetXaxis().SetTitle("Realitive precision (dmeas - dknown) / dknown")
Hist_Raw.GetYaxis().SetTitle("Frequency")
Hist_Raw.SetLineWidth(2)
Hist_Raw.SetLineColor(2)
Hist_Raw.Draw()
Hist_Calib.SetLineWidth(2)
Hist_Calib.SetLineColor(4)
Hist_Calib.Draw("same")

c0.Update()
if (SaveFigures) : c0.SaveAs("UncalibratedCalibrated.png")


# ------------------------------------------------------------------ #
# Show the distribution of fitting results:
# ------------------------------------------------------------------ #

c1 = TCanvas("c1","", 120, 100, 600, 400)
    
Hist_lsig2D.GetXaxis().SetTitle("Signal light from star (lsig)")
Hist_lsig2D.GetYaxis().SetTitle("Realitive precision (dmeas - dknown) / dknown")
Hist_lsig2D.GetXaxis().SetRangeUser(0.0, 50.0)
Hist_lsig2D.Draw()
ProfX_lsig = Hist_lsig2D.ProfileX("ProfX_lsig")
ProfX_lsig.SetLineWidth(2)
ProfX_lsig.SetLineColor(2)
ProfX_lsig.Draw("same")

c1.Update()


raw_input( ' ... Press enter to exit ... ' )

# ----------------------------------------------------------------------------------- #
# 
#  As always look at the data and get a feel for each of the variables. A good idea might
#  be to plot them all to know what range to expect them in.
# 
#  Next, consider the distribution of relative differences between the observed (by your
#  method!) and actual distance: (dist_obs - dist_known) / dist_known
#  The RMS is 0.27, i.e. a 27% precision, and it is neither centered at zero (as it
#  should not to be biased), nor very Gaussian. This is what you want to improve upon!
# 
#  Finally, there is the distribution of the bias and relative precision as a function of
#  the signal luminosity (lsig). As you can see, the response does not depend on lsig,
#  and so there seems to be no (varying) bias from this variable. Check the other three
#  variables, and if you find some bias, try to correct for it, such that you get the
#  relative difference to be the most narrow Gaussian centered around 0.
# 
# 
#  Questions:
#  ----------
#   1) What corrections do you apply for each of the variables, and how much do each of
#      them improve on the result?
#   2) Try at the end to figure out, which variables the final resolution depends on.
#      Can you give an estimate of the uncertainty for each single star?
# 
# ----------------------------------------------------------------------------------- #