#!/usr/bin/env python

# ----------------------------------------------------------------------------------- #
#  Python script for producing illustration of ROC curve with Gaussian examples.
#   
#  Author: Troels C. Petersen (NBI)
#  Date:   3rd of December 2015
#
# ----------------------------------------------------------------------------------- #

#Load modules here
from array import array
from ROOT import *
from math import sqrt, sin, cos, tan, exp

#Set plotting style
gStyle.SetOptStat(0)
gStyle.SetOptFit(0)

# Random numbers from the Mersenne-Twister:
r = TRandom3()
r.SetSeed(1)

SavePlots = True
verbose = True
Nverbose = 10
veryverbose = False


# ----------------------------------------------------------------------------------- #
# Functions:
# ----------------------------------------------------------------------------------- #

# Calculate the sqr of a function:
def sqr( a ) : return a*a


# Calculate ROC curve from two histograms:
def CalcROC(histSig, histBkg) :

    # Loop over histogram bins and calculate vectors of efficiencies:
    # ---------------------------------------------------------------
    effSig = array( "f" )
    effBkg = array( "f" )

    # Check that the two histograms have the same number of bins and same range:
    if (histSig.GetNbinsX() == histBkg.GetNbinsX()) :
        if ( abs(histSig.GetXaxis().GetXmax() - histBkg.GetXaxis().GetXmax()) < 0.0001 and
             abs(histSig.GetXaxis().GetXmin() - histBkg.GetXaxis().GetXmin()) < 0.0001) :
            Nbins = histSig.GetNbinsX()

            # Get integral (including underflow and overflow):
            integralSig = 0.0
            integralBkg = 0.0
            for ibin in range(Nbins+2) :
                integralSig += histSig.GetBinContent(ibin)
                integralBkg += histBkg.GetBinContent(ibin)

            # Integrate each bin, and add result to ROC curve (contained in effSig and effBkg):
            effSig.append(0.0)
            effBkg.append(0.0)
            sumSig = 0.0
            sumBkg = 0.0
            for ibin in range (Nbins+1, 0, -1) :
                sumSig += histSig.GetBinContent(ibin)
                sumBkg += histBkg.GetBinContent(ibin)
                effSig.append(sumSig/integralSig)
                effBkg.append(sumBkg/integralBkg)
                if (veryverbose) :
                    print "  bin %3d:   effSig = %5.3f   effBkg = %5.3f"%(ibin, effSig[-1], effBkg[-1])
      
            # Make ROC curve in a TGraph of the two arrays, and return this:
            graphROC = TGraph(len(effSig), effSig, effBkg)
            return graphROC

        else :
            print "ERROR: Signal and Background histograms have different ranges!"
            return None
    
    else :
        print "ERROR: Signal and Background histograms have different binning!"
        return None

    return None


# ----------------------------------------------------------------------------------- #
# Produce histograms with Gaussian distributions (signal & background) of varying separation:
# ----------------------------------------------------------------------------------- #

Nsigma = [0.0, 0.5, 1.0, 1.5, 2.0, 3.0]      # We choose six signal-background separations

# Based on the above, define the means and widths of the signal and background Gaussians:
# I choose the widths to be unit, and the means to move symmetrically around 5.0:
# Nsigma = (mean_sig - mean_bkg) / sqrt(width_sig^2 + width_bkg^2) = dmean / sqrt(2)
mean_sig = []
mean_bkg = []
for isig in range ( len(Nsigma) ) :
    mean_sig.append(5.0 + Nsigma[isig] * sqrt(2.0) / 2.0)
    mean_bkg.append(5.0 - Nsigma[isig] * sqrt(2.0) / 2.0)

# Produce histograms accordingly:
Nentries  = 10000
hist_sig  = []
hist_bkg  = []
graph_ROC = []

for isig in range ( len(Nsigma) ) :
    name = "hist_sig_Nsigma" + str(isig)          # To give each histogram a different name
    hist_sig.append(TH1D(name, ";Separating variable;Frequency", 100, 0.0, 10.0))
    name = "hist_bkg_Nsigma" + str(isig)          # To give each histogram a different name
    hist_bkg.append(TH1D(name, ";Separating variable;Frequency", 100, 0.0, 10.0))

    for ientry in range ( Nentries ) :
        hist_sig[isig].Fill( r.Gaus(mean_sig[isig]), 1.0 )
        hist_bkg[isig].Fill( r.Gaus(mean_bkg[isig]), 1.0 )

    # Make ROC curve from signal and background histograms:
    graph_ROC.append(CalcROC(hist_sig[isig], hist_bkg[isig]))


# ----------------------------------------------------------------------------------- #
# Make plot showing histograms inside ROC curve plot:
# ----------------------------------------------------------------------------------- #

canvas2 = TCanvas("canvas2", "ROC curve example", 20, 20, 900, 800)
canvas2.DrawFrame(0.0, 0.0, 1.0, 1.0)          # I define a frame inside the canvas

pads = []
xpads = [0.13, 0.31, 0.49, 0.13, 0.31, 0.13]
ypads = [0.68, 0.68, 0.68, 0.50, 0.50, 0.32]
xsize = 0.22
ysize = 0.20

for isig in range ( len(Nsigma) ) :

    # Draw ROC curves:
    canvas2.cd()
    graph_ROC[isig].SetLineColor(kMagenta)
    graph_ROC[isig].SetLineWidth(2)
    graph_ROC[isig].Draw("same")

    # Draw corresponding histogram in its own pad:
    name = "pad_Nsigma" + str(isig)          # To give each pad a different name
    pads.append(TPad(name, "", xpads[isig], ypads[isig], xpads[isig]+xsize, ypads[isig]+ysize, 0, 0, 0))
    pads[-1].SetTopMargin(0.0);
    pads[-1].SetLineColor(0);
    pads[-1].SetFillColor(0);
    pads[-1].GetFrame().SetFillColor(0);
    pads[-1].GetFrame().SetBorderSize(0);
    pads[-1].GetFrame().SetLineColor(kWhite);
    pads[-1].Draw();
    pads[-1].cd();

    hist_sig[isig].SetLineColor(kRed)
    hist_sig[isig].SetLineWidth(3)
    hist_sig[isig].Draw("")
    hist_bkg[isig].SetLineColor(kBlue)
    hist_bkg[isig].SetLineWidth(3)
    hist_bkg[isig].Draw("same")


"""
legend2 = TLegend( 0.15, 0.70, 0.38, 0.85 )
legend2.SetFillColor(kWhite)
legend2.SetLineColor(kWhite)
legend2.AddEntry(graph_ROC[isig],   " Fraction HT - All hits", "LP")
legend2.Draw()
"""

canvas2.Update()
if (SavePlots) : canvas2.SaveAs("ROCcurves_GaussianSeparations.pdf")

raw_input( ' ... ' )