#!/usr/bin/env python

# ----------------------------------------------------------------------------------- #
#  Python script for producing illustration of ROC curve with Gaussian examples.
#   
#  Author: Troels C. Petersen (NBI)
#  Date:   3rd of December 2017
#
#  Run by: ./MakeROCfigure.py
#
# ----------------------------------------------------------------------------------- #

# First, import the modules you want to use:
from __future__ import print_function, division        #
import numpy as np                                     # Matlab like syntax for linear algebra and functions
import matplotlib.pyplot as plt                        # Plots and figures like you know them from Matlab
plt.close('all')         # to close all open figures

r = np.random             # Random generator
r.seed(42)                # Set a random seed (but a fixed one)

SavePlots = True          # For now, don't save plots (once you trust your code, switch on)
verbose = True            # For now, print a lot of output (once you trust your code, switch off)
Nverbose = 10             # But only print a lot for the first 10 random numbers
veryverbose = False


# ----------------------------------------------------------------------------------- #
# Functions:
# ----------------------------------------------------------------------------------- #

## Calculate ROC curve from two histograms:
def calc_ROC(hist1, hist2):
    
    y_sig, x_edges, _ = hist1 
    y_bkg, x2_edges, _ = hist2
    
    # Check that the two histograms have the same number of bins and same range:
    if (x_edges == x2_edges).any():
        
        x_centers = 0.5*(x_edges[1:] + x_edges[:-1])
        
        N = len(y_sig)
        
        integral_sig = y_sig.sum()
        integral_bkg = y_bkg.sum()
    
        FPR = np.zeros(N) # False positive rate ()
        TPR = np.zeros(N) # True positive rate (sensitivity)
        
        # https://upload.wikimedia.org/wikipedia/commons/4/4f/ROC_curves.svg
        for i, x in enumerate(x_centers): 
            cut = (x_centers < x)
            TN = np.sum(y_bkg[cut]) / integral_bkg      # True negatives (background)
            FP = np.sum(y_bkg[~cut]) / integral_bkg     # False positives (signal)
                
            FN = np.sum(y_sig[cut]) / integral_sig     # False negatives
            TP = np.sum(y_sig[~cut]) / integral_sig    # True positives
            
            FPR[i] = FP / ( FP + TN )
            TPR[i] = TP / ( TP + FN )
        
        return FPR, TPR
    
    else:
        print("ERROR: Signal and Background histograms have different bins and ranges")


# ----------------------------------------------------------------------------------- #
# Produce histograms with Gaussian distributions (signal & background) of varying separation:
# ----------------------------------------------------------------------------------- #

Nsigma = np.array([0.0, 0.5, 1.0, 1.5, 2.0, 3.0])      # We choose six signal-background separations

# Based on the above, define the means and widths of the signal and background Gaussians:
# I choose the widths to be unit, and the means to move symmetrically around 5.0:
# Nsigma = (mean_sig - mean_bkg) / sqrt(width_sig^2 + width_bkg^2) = dmean / sqrt(2)

mean_sig = 5 + Nsigma / np.sqrt(2.0)
mean_bkg = 5 - Nsigma / np.sqrt(2.0)

Nentries  = 10000

random_sig = r.normal(loc=mean_sig, scale=1, size=(Nentries, len(Nsigma)))
random_bkg = r.normal(loc=mean_bkg, scale=1, size=(Nentries, len(Nsigma)))


from matplotlib.gridspec import GridSpec
fig = plt.figure(figsize=(16,6))

gs = GridSpec(2,5) # 2 rows, 3 columns
ax = [fig.add_subplot(gs[0,0]), # First row,   first column
      fig.add_subplot(gs[1,0]), # Second row,  first column
      fig.add_subplot(gs[0,1]), # First row,   second column
      fig.add_subplot(gs[1,1]), # Second row,  second column
      fig.add_subplot(gs[0,2]), # First row,  third column
      fig.add_subplot(gs[1,2])] # Second row,  third column
ax2 = fig.add_subplot(gs[:,3:]) # Span both rows and columns from 3 and onwards


Nbins = 100
xmin, xmax = 0, 10

hist_sig = []
hist_bkg = []

y_sig = np.zeros((Nbins, len(Nsigma)))
y_bkg = np.zeros_like(y_sig)


for i in range(len(Nsigma)):
    hist_sig.append(ax[i].hist(random_sig[:, i], bins=Nbins, range=(xmin, xmax), histtype='step', label='Signal'))
    hist_bkg.append(ax[i].hist(random_bkg[:, i], bins=Nbins, range=(xmin, xmax), histtype='step', label='Background'))
    ax[i].set_xlabel('Separating variable')
    ax[i].set_ylabel('Frequency')
    ax[i].text(0.75, 0.9, r'{:1.1f}$\sigma$'.format(Nsigma[i]), size=16,
              verticalalignment='center', transform=ax[i].transAxes)
ax[i].legend(loc='upper left')
    

text_sigma_loc = [(0.5, 0.45), (0.36, 0.59), (0.23, 0.72), (0.15, 0.82), (0.1, 0.9), (0.02, 0.96)]
for i in range(len(Nsigma)):
    FPR, TPR = calc_ROC(hist_sig[i], hist_bkg[i])
#    rejection_bkg = 1 - eff_bkg
    text_sigma = r'{:1.1f}$\sigma$'.format(Nsigma[i])
    ax2.plot(FPR, TPR, '-', label=text_sigma)
    ax2.text(text_sigma_loc[i][0], text_sigma_loc[i][1], text_sigma, size=12,
              verticalalignment='center', transform=ax2.transAxes)

ax2.plot([0, 1], [0, 1], '--', label='Random Guess')
ax2.set_xlabel('False Positive Rate (Background Efficiency)')
ax2.set_ylabel('True Positive Rate (Signal Efficiency)')
ax2.set_xlim(0, 1)
ax2.set_ylim(0, 1)
ax2.legend()

fig.tight_layout()
plt.show(block=False)

if SavePlots:
    fig.savefig('ROCcurves_GaussianSeparations.pdf', dpi=600)


try:
    __IPYTHON__
except:
    raw_input('Press Enter to exit')


# ----------------------------------------------------------------------------------- #
# 
# Make sure that you understand what the ROC represents, and how it is calculated!
# 
# ----------------------------------------------------------------------------------- #