#!/usr/bin/env python

# ------------------------------------------------------------------------ #
#  Program for illustrating the use of sWeights from multidimensional fits.
# 
#  Author: Troels C. Petersen (NBI)
#  Email:  petersen@nbi.dk
#  Date:   14th of March 2016
# ------------------------------------------------------------------------ #

from ROOT import *
from array import array
import math
import sys
from numpy import matrix
from numpy import linalg

gStyle.SetOptStat("emr")
gStyle.SetOptFit(1111)

r = TRandom3()
r.SetSeed(1)

# Number of events in total:
Nsig =  5000
Nbkg = 10000

SavePlots = True
verbose = True
Nverbose = 10
RunFast = False    # Fix PDF shape parameters to truth values!

pi = TMath.Pi()


# ----------------------------------------------------------------------------------- #
# Functions:
# ----------------------------------------------------------------------------------- #
def sqr( a ) : return a*a


# Beta distribution: f(x) = (1 + a*x + b*x^2) / (2 + 2b/3) for x in [-1,1]
# ----------------------------------------------------------------------------------- #
def betadist(x,a,b) :
    if (abs(x) <= 1.0) : return (1.0 + a*x + b*x*x) / (2.0 + 2.0*b/3.0)
    else               : return 0.0

# Oscillation distribution: f(x) = 1 + A*cos(omega*x + phi) for x in [-1,1]
# ----------------------------------------------------------------------------------- #
def oscdist(x, A, omega, phi) :
    if (abs(A) <= 1.0) : return 1.0 + A*cos(omega*x + phi)
    else               : return 0.0


# Function for generating numbers according to beta distributions:
# ----------------------------------------------------------------------------------- #
def GetAngle( alpha, beta ) :
    if (alpha+beta > 1.0) :
        print "ERROR: alpha and/or beta not in defined range: alpha = %6.2f   beta = %6.2f"%(alpha,beta)
        return -999
    x = -1.0 + 2.0 * r.Uniform()
    y = (1.0+abs(alpha)+abs(beta)) * r.Uniform()
    while (y > betadist(x,alpha,beta)) :
        x = -1.0 + 2.0 * r.Uniform()
        y = (1.0+abs(alpha)+abs(beta)) * r.Uniform()
    return x

# Function for generating numbers according to oscillation distributions:
# ----------------------------------------------------------------------------------- #
def GetAngle2( A, omega, phi ) :
    if (abs(A) > 1.0) :
        print "ERROR: A not in defined range: A = %6.2f"%(A)
        return -999
    x = -1.0 + 2.0 * r.Uniform()
    y = (1.0+abs(A)) * r.Uniform()
    while (y > oscdist(x, A, omega, phi)) :
        x = -1.0 + 2.0 * r.Uniform()
        y = (1.0+abs(A)) * r.Uniform()
    return x


# ------------------------------------------------------------------------ #
def main() : 
# ------------------------------------------------------------------------ #

    outfile = open('output.txt', 'w')

    # Define lists and histograms (fast running):
    # x_sig = [ array('f', [0.0, 0.0, 0.0] ) for _ in xrange(Nsig) ]
    # x_bkg = [ array('f', [0.0, 0.0, 0.0] ) for _ in xrange(Nbkg) ]
    # x_all = [ array('f', [0.0, 0.0, 0.0] ) for _ in xrange(Nsig+Nbkg) ]
    # Define lists and histograms (fast coding):
    x_sig = []
    x_bkg = []
    x_all = []

    # Number of variables and definition of 1D histogram sizes:
    Nvar = 3
    Nbins = [ 100,  100,  100]
    xmin  = [ 0.0, -3.0, -1.0]
    xmax  = [ 2.0,  2.0,  1.0]
    VarName = ["Mass", "Shape", "Angle"]
    hist_sig = [TH1D("hist_sig_" + VarName[ivar], ";" + VarName[ivar] + ";Frequency", Nbins[ivar], xmin[ivar], xmax[ivar]) for ivar in xrange(Nvar)]
    hist_bkg = [TH1D("hist_bkg_" + VarName[ivar], ";" + VarName[ivar] + ";Frequency", Nbins[ivar], xmin[ivar], xmax[ivar]) for ivar in xrange(Nvar)]
    hist_all = [TH1D("hist_all_" + VarName[ivar], ";" + VarName[ivar] + ";Frequency", Nbins[ivar], xmin[ivar], xmax[ivar]) for ivar in xrange(Nvar)]

    # 2D histograms:
    hist2D_sig = TH2D("hist2D_sig", ";Mass;Shape", Nbins[0], xmin[0], xmax[0], Nbins[1], xmin[1], xmax[1])
    hist2D_bkg = TH2D("hist2D_bkg", ";Mass;Shape", Nbins[0], xmin[0], xmax[0], Nbins[1], xmin[1], xmax[1])
    hist2D_all = TH2D("hist2D_all", ";Mass;Shape", Nbins[0], xmin[0], xmax[0], Nbins[1], xmin[1], xmax[1])

    # Calculating binwidth to ensure that PDFs are normalised correctly:
    binwidth0 = (xmax[0] - xmin[0]) / Nbins[0]
    binwidth1 = (xmax[1] - xmin[1]) / Nbins[1]
    binarea = binwidth0 * binwidth1


    # ------------------------------------------------------------------------------
    # Produce the desired number of points with 3 variables for signal and background:
    #  * Mass:  Gaussian peak vs. exponential.
    #  * Shape: Two Gaussian peaks.
    #  * Angle: Beta distribution.
    # ------------------------------------------------------------------------------

    # Produce mass, shape and angle values for signal:
    for isig in xrange ( Nsig ) :

        x_sig.append( [ r.Gaus(  0.8, 0.2 ), r.Gaus(  0.0, 0.4 ), GetAngle( 0.8, 0.2 ) ] )
        x_all.append( [ r.Gaus(  0.8, 0.2 ), r.Gaus(  0.0, 0.4 ), GetAngle( 0.8, 0.2 ) ] )
        # x_sig.append( [ r.Gaus(  0.8, 0.2 ), r.Gaus(  0.0, 0.4 ), GetAngle2( 0.9, 12.0, 1.0 ) ] )
        # x_all.append( [ r.Gaus(  0.8, 0.2 ), r.Gaus(  0.0, 0.4 ), GetAngle2( 0.9, 12.0, 1.0 ) ] )
        for i in xrange( Nvar ) :
            hist_sig[i].Fill(x_sig[-1][i])
            hist_all[i].Fill(x_sig[-1][i])
        hist2D_sig.Fill(x_sig[-1][0], x_sig[-1][1])
        hist2D_all.Fill(x_sig[-1][0], x_sig[-1][1])

        # Write to output file (in ASCII):
        for i in xrange( Nvar ) : outfile.write( " %8.4f"%(x_sig[-1][i]) ),
        outfile.write("\n")
        if (verbose and isig < Nverbose) :
            print "  Signal:    ",
            for i in xrange( Nvar ) : print "  var%1d: %6.3f"%(isig, x_sig[-1][i]),
            print

        
    # Produce mass, shape and angle values for signal:
    for ibkg in xrange ( Nbkg ) :

        x_bkg.append( [ r.Exp( 0.5 ), r.Gaus( -1.0, 0.6 ), GetAngle( -0.8, 0.2 ) ] )
        x_all.append( [ r.Exp( 0.5 ), r.Gaus( -1.0, 0.6 ), GetAngle( -0.8, 0.2 ) ] )
        # x_bkg.append( [ r.Exp( 0.5 ), r.Gaus( -1.0, 0.6 ), GetAngle2( 0.8, 17.0, 0.5 ) ] )
        # x_all.append( [ r.Exp( 0.5 ), r.Gaus( -1.0, 0.6 ), GetAngle2( 0.8, 17.0, 0.5 ) ] )
        for i in xrange( Nvar ) :
            hist_bkg[i].Fill(x_bkg[-1][i])
            hist_all[i].Fill(x_bkg[-1][i])
        hist2D_bkg.Fill(x_bkg[-1][0], x_bkg[-1][1])
        hist2D_all.Fill(x_bkg[-1][0], x_bkg[-1][1])

        # Write to output file (in ASCII):
        for i in xrange( Nvar ) : outfile.write( " %8.4f"%(x_bkg[-1][i]) ),
        outfile.write("\n")
        if (verbose and ibkg < Nverbose) :
            print "  Background:",
            for i in xrange( Nvar ) : print "  var%1d: %6.3f"%(ibkg, x_bkg[-1][i]),
            print
 
    outfile.close()


    # Fit the signal and background samples in 1D:
    # --------------------------------------------
    # Fit mass dimension:
    def func_GaussExp(x, p) :
        z = (x[0] - p[1]) / p[2]
        sig = p[0]*binwidth0 / sqrt(2.0*pi)/p[2] * exp(-0.5*z*z)
        bkg = p[3]*binwidth0 * p[4] * exp(-p[4]*x[0])
        return sig+bkg

    fit_mass = TF1("fit_mass", func_GaussExp, 0.0, 2.0, 5)
    fit_mass.SetParameters(Nsig, 1.0, 0.2, Nbkg, 0.5)
    fit_mass.SetLineColor(kBlack)
    hist_all[0].Fit("fit_mass", "LR")

    # Fit shape dimension:
    def func_GaussGauss(x, p) :
        zsig = (x[0] - p[1]) / p[2]
        sig = p[0]*binwidth1 / sqrt(2.0*pi)/p[2] * exp(-0.5*zsig*zsig)
        zbkg = (x[0] - p[4]) / p[5]
        bkg = p[3]*binwidth1 / sqrt(2.0*pi)/p[5] * exp(-0.5*zbkg*zbkg)
        return sig+bkg

    fit_shape = TF1("fit_shape", func_GaussGauss, -3.0, 2.0, 6)
    fit_shape.SetParameters(Nsig, 0.0, 0.4, Nbkg, -1.0, 0.6)
    fit_shape.SetLineColor(kBlack)
    hist_all[1].Fit("fit_shape", "LR")


    # Fit the signal and background samples in 2D (mass and shape):
    # ------------------------------------------------------------------------
    def func_GEGG(x, p) :
        z0sig = (x[0] - p[1]) / p[2]
        z1sig = (x[1] - p[3]) / p[4]
        sig = p[0]*binarea / sqrt(2.0*pi)/p[2] * exp(-0.5*z0sig*z0sig) / sqrt(2.0*pi)/p[4] * exp(-0.5*z1sig*z1sig)
        z1bkg = (x[1] - p[7]) / p[8]
        bkg = p[5]*binarea * p[6] * exp(-p[6]*x[0]) / sqrt(2.0*pi)/p[8] * exp(-0.5*z1bkg*z1bkg)
        return sig+bkg
 
    fit_massshape = TF2("fit_massshape", func_GEGG, 0.0, 2.0, -3.0, 2.0, 9)
    # fit_massshape.SetParameters(Nsig, 1.0, 0.2, 0.0, 0.4, Nbkg, 0.5, -1.0, 0.6)
    fit_massshape.SetParameters(fit_mass.GetParameter(0), fit_mass.GetParameter(1), fit_mass.GetParameter(2),
                                fit_shape.GetParameter(1), fit_mass.GetParameter(2), fit_mass.GetParameter(0),
                                fit_mass.GetParameter(1), fit_shape.GetParameter(1), fit_shape.GetParameter(2))
    if (RunFast) :     # For debugging it is worthwhile skipping a 9 parameter fit in 2D!!!
        fit_massshape.FixParameter(1, 1.0)
        fit_massshape.FixParameter(2, 0.2)
        fit_massshape.FixParameter(3, 0.0)
        fit_massshape.FixParameter(4, 0.4)
        fit_massshape.FixParameter(6, 0.5)
        fit_massshape.FixParameter(7,-1.0)
        fit_massshape.FixParameter(8, 0.6)
    fit_massshape.SetLineColor(kBlack)
    hist2D_all.Fit("fit_massshape", "LR")

    # To get the covariance matrix between signal and background normalisations,
    # we fix the shape variables and refit the data:
    fit_massshape2 = TF2("fit_massshape2", func_GEGG, 0.0, 2.0, -3.0, 2.0, 9)
    # fit_massshape2.SetParameters(Nsig, 1.0, 0.2, 0.0, 0.4, Nbkg, 0.5, -1.0, 0.6)
    fit_massshape2.SetParameters(fit_mass.GetParameter(0), fit_mass.GetParameter(1), fit_mass.GetParameter(2),
                                 fit_shape.GetParameter(1), fit_mass.GetParameter(2), fit_mass.GetParameter(0),
                                 fit_mass.GetParameter(1), fit_shape.GetParameter(1), fit_shape.GetParameter(2))
    for ivar in xrange (9) :
        if (ivar != 0 and ivar != 5) :
            fit_massshape2.FixParameter(ivar, fit_massshape.GetParameter(ivar))
    fit_massshape2.SetLineColor(kBlack)
    fitres = hist2D_all.Fit("fit_massshape2", "LRS")      # Option "S" gives fit result as output!
    cmfit = fitres.GetCovarianceMatrix()                  # Only non-zero entries for variable 0 and 5
    covmatfit = matrix( [ [cmfit[0][0], cmfit[0][5]], [cmfit[5][0], cmfit[5][5] ] ] )

    
    # Produce plots with the variable distributions for signal and background:
    # ------------------------------------------------------------------------
    canvas0 = TCanvas("canvas0", "", 20, 20, 1200, 700)

    for i in range ( Nvar ) :
        hist_all[i].SetXTitle(VarName[i])
        hist_all[i].SetYTitle("Frequency")
        hist_all[i].SetMinimum(0.0)
        hist_all[i].SetLineColor(kMagenta)
        hist_all[i].SetLineWidth(2)
        hist_all[i].Draw()
        hist_sig[i].SetLineColor(kRed)
        hist_sig[i].SetLineWidth(2)
        hist_sig[i].Draw("same")
        hist_bkg[i].SetLineColor(kBlue)
        hist_bkg[i].SetLineWidth(2)
        hist_bkg[i].Draw("same")

        canvas0.Update()
        if (SavePlots) : canvas0.SaveAs("DistAll_" + VarName[i] + ".pdf")

    # Get a separate plot of the 2D fit:
    hist2D_all.SetMarkerColor(kMagenta)
    hist2D_all.Draw("box")

    canvas0.Update()
    if (SavePlots) : canvas0.SaveAs("Dist2D_All_fit.pdf")


    # Produce plots with the variable distributions for signal and background:
    # ------------------------------------------------------------------------
    canvas1 = TCanvas("canvas1", "", 20, 20, 1200, 700)
    canvas1.Divide(2,2)

    for i in xrange( Nvar ) : 
        canvas1.cd(i+1)
        hist_all[i].SetXTitle(VarName[i])
        hist_all[i].SetYTitle("Frequency")
        hist_all[i].SetMinimum(0.0)
        hist_all[i].SetLineColor(kMagenta)
        hist_all[i].SetLineWidth(2)
        hist_all[i].Draw()
        hist_sig[i].SetLineColor(kRed)
        hist_sig[i].SetLineWidth(2)
        hist_sig[i].Draw("same")
        hist_bkg[i].SetLineColor(kBlue)
        hist_bkg[i].SetLineWidth(2)
        hist_bkg[i].Draw("same")

    canvas1.cd(4)
    hist2D_all.SetMarkerColor(kMagenta)
    hist2D_all.Draw("box")

    canvas1.Update()
    if (SavePlots) : canvas1.SaveAs("VariableDistributions.pdf")


    # -----------------------------------------------------------------------------------
    # Given succesful fit, calculate sWeights the produce signal distribution of "angle":
    # -----------------------------------------------------------------------------------

    par = []     # Abbreviation of an otherwise very long expression!
    for ipar in xrange ( 9 ) : par.append(fit_massshape.GetParameter(ipar))

    def func_GEGG_signal(x) :
        z0sig = (x[0] - par[1]) / par[2]
        z1sig = (x[1] - par[3]) / par[4]
        return par[0] / sqrt(2.0*pi)/par[2] * exp(-0.5*z0sig*z0sig) / sqrt(2.0*pi)/par[4] * exp(-0.5*z1sig*z1sig)

    def func_GEGG_background(x) :
        z1bkg = (x[1] - par[7]) / par[8]
        return par[5] * par[6] * exp(-par[6]*x[0]) / sqrt(2.0*pi)/par[8] * exp(-0.5*z1bkg*z1bkg)

    # Covariance matrix (inverse to be inverted), calculated from sum over all events:
    iclist = [ [0.0, 0.0], [0.0, 0.0] ]
    for i in xrange ( len(x_all) ) :
        sigPDF = func_GEGG_signal( [x_all[i][0], x_all[i][1] ] ) / par[0]
        bkgPDF = func_GEGG_background( [x_all[i][0], x_all[i][1] ] ) / par[5]
        denominator = sqr( par[0]*sigPDF + par[5]*bkgPDF )
        iclist[0][0] += sigPDF*sigPDF / denominator
        iclist[0][1] += sigPDF*bkgPDF / denominator
        iclist[1][0] += bkgPDF*sigPDF / denominator
        iclist[1][1] += bkgPDF*bkgPDF / denominator
    invcovmat = matrix( [ [iclist[0][0], iclist[0][1]], [iclist[1][0], iclist[1][1]] ] )
    # print "Inverse covarance matrix from direct calculation:  "
    # print invcovmat

    covmat = invcovmat.I
    print "Covarance matrix as obtained from direct calculation: "
    print covmat

    print "Covarance matrix as obtained from fit: "
    print covmatfit

    print "Number of signal events:      %8.1f  (NsigTrue = %5d)"%(par[0], Nsig)
    print "Number of background events:  %8.1f  (NbkgTrue = %5d)"%(par[5], Nbkg)


    # Loop over all events and calculate their sWeight (and background weight = 1-sWeight):
    # Also, use the sWeight in producing plot of signal (and background) angle!
    hist_sWeight_sig = TH1D("hist_sWeight_sig", ";sWeight (for signal events);Frequency", 175, -1.0, 2.5)
    hist_sWeight_bkg = TH1D("hist_sWeight_bkg", ";bWeight (for background events);Frequency", 175, -1.0, 2.5)
    hist_Angle_sig = TH1D("hist_Angle_sig", ";Signal Angle;Frequency", Nbins[2], xmin[2], xmax[2])
    hist_Angle_bkg = TH1D("hist_Angle_bkg", ";Background Angle;Frequency", Nbins[2], xmin[2], xmax[2])

    for i in xrange ( len(x_all) ) :
        sigPDF = func_GEGG_signal( [x_all[i][0], x_all[i][1] ] ) / par[0]
        bkgPDF = func_GEGG_background( [x_all[i][0], x_all[i][1] ] ) / par[5]
        sWeight = (covmat.item(0,0) * sigPDF + covmat.item(0,1) * bkgPDF) / (par[0]*sigPDF + par[5]*bkgPDF)
        bWeight = (covmat.item(1,0) * sigPDF + covmat.item(1,1) * bkgPDF) / (par[0]*sigPDF + par[5]*bkgPDF)
        
        # Plot weights for signal and background events:
        if (i < Nsig) : hist_sWeight_sig.Fill(sWeight)
        else :          hist_sWeight_bkg.Fill(bWeight)
        hist_Angle_sig.Fill(x_all[i][2], sWeight)
        hist_Angle_bkg.Fill(x_all[i][2], bWeight)

        if ((verbose and i < Nverbose) or (verbose and i >= Nsig and i < Nsig+Nverbose)) :
            print "  %4d:   m=%6.3f  s=%6.3f   sigPDF=%6.3f  bkgPDF=%6.3f   s/bWeight = %6.3f %6.3f"%(i,x_all[i][0],x_all[i][1],sigPDF,bkgPDF,sWeight,bWeight)


    # Produce plots with the variable distributions for signal and background:
    # ------------------------------------------------------------------------
    canvas2 = TCanvas("canvas2", "", 50, 50, 1200, 700)

    hist_sWeight_sig.SetLineColor(kRed)
    hist_sWeight_sig.SetLineWidth(2)
    hist_sWeight_sig.Draw()
    canvas2.Update()
    if (SavePlots) : canvas2.SaveAs("Angle_sWeight.pdf")

    hist_sWeight_bkg.SetLineColor(kBlue)
    hist_sWeight_bkg.SetLineWidth(2)
    hist_sWeight_bkg.Draw()
    canvas2.Update()
    if (SavePlots) : canvas2.SaveAs("Angle_bWeight.pdf")

    hist_Angle_sig.SetLineColor(kRed)
    hist_Angle_sig.SetLineWidth(2)
    hist_Angle_sig.Draw("e")
    hist_sig[2].SetLineColor(kMagenta)
    hist_sig[2].SetLineWidth(2)
    hist_sig[2].Draw("same")
    p_chi2 = hist_sig[2].Chi2Test(hist_Angle_sig, "UW P");
    p_kolm = hist_sig[2].KolmogorovTest(hist_Angle_sig, "N");
    print "  Chi2 and Kolmogorov probability between sPlot and Truth of signal: %6.4f %6.4f"%(p_chi2, p_kolm)
    canvas2.Update()
    if (SavePlots) : canvas2.SaveAs("Angle_signal_sWeights.pdf")

    hist_Angle_bkg.SetLineColor(kBlue)
    hist_Angle_bkg.SetLineWidth(2)
    hist_Angle_bkg.Draw("e")
    hist_bkg[2].SetLineColor(kMagenta)
    hist_bkg[2].SetLineWidth(2)
    hist_bkg[2].Draw("same")
    p_chi2 = hist_bkg[2].Chi2Test(hist_Angle_bkg, "UW P");
    p_kolm = hist_bkg[2].KolmogorovTest(hist_Angle_bkg, "N");
    print "  Chi2 and Kolmogorov probability between sPlot and Truth of background: %6.4f %6.4f"%(p_chi2, p_kolm)
    canvas2.Update()
    if (SavePlots) : canvas2.SaveAs("Angle_background_bWeights.pdf")


    # Produce one plot with everything in (which gives a good overview):
    canvas2.Divide(2,2)

    canvas2.cd(1)
    hist_sWeight_sig.Draw()

    canvas2.cd(2)
    hist_sWeight_bkg.Draw()

    canvas2.cd(3)
    hist_Angle_sig.Draw("e")
    hist_sig[2].Draw("same")

    canvas2.cd(4)
    hist_Angle_bkg.Draw("e")
    hist_bkg[2].Draw("same")

    canvas2.Update()
    if (SavePlots) : canvas2.SaveAs("Angle_PlotOfEverything.pdf")


    raw_input( ' ... ' )
    return 


# ------------------------------------------------------------------------ #
if __name__ == '__main__':
    sys.exit( main() )