#!/usr/bin/env python # ----------------------------------------------------------------------------------- # # Python script for producing illustration of ROC curve with Gaussian examples. # # Author: Troels C. Petersen (NBI) # Date: 3rd of December 2017 # # Run by: ./MakeROCfigure.py # # ----------------------------------------------------------------------------------- # # First, import the modules you want to use: from __future__ import print_function, division # import numpy as np # Matlab like syntax for linear algebra and functions import matplotlib.pyplot as plt # Plots and figures like you know them from Matlab plt.close('all') # to close all open figures r = np.random # Random generator r.seed(42) # Set a random seed (but a fixed one) SavePlots = True # For now, don't save plots (once you trust your code, switch on) verbose = True # For now, print a lot of output (once you trust your code, switch off) Nverbose = 10 # But only print a lot for the first 10 random numbers veryverbose = False # ----------------------------------------------------------------------------------- # # Functions: # ----------------------------------------------------------------------------------- # ## Calculate ROC curve from two histograms: def calc_ROC(hist1, hist2): y_sig, x_edges, _ = hist1 y_bkg, x2_edges, _ = hist2 # Check that the two histograms have the same number of bins and same range: if (x_edges == x2_edges).any(): x_centers = 0.5*(x_edges[1:] + x_edges[:-1]) N = len(y_sig) integral_sig = y_sig.sum() integral_bkg = y_bkg.sum() FPR = np.zeros(N) # False positive rate () TPR = np.zeros(N) # True positive rate (sensitivity) # https://upload.wikimedia.org/wikipedia/commons/4/4f/ROC_curves.svg for i, x in enumerate(x_centers): cut = (x_centers < x) TN = np.sum(y_bkg[cut]) / integral_bkg # True negatives (background) FP = np.sum(y_bkg[~cut]) / integral_bkg # False positives (signal) FN = np.sum(y_sig[cut]) / integral_sig # False negatives TP = np.sum(y_sig[~cut]) / integral_sig # True positives FPR[i] = FP / ( FP + TN ) TPR[i] = TP / ( TP + FN ) return FPR, TPR else: print("ERROR: Signal and Background histograms have different bins and ranges") # ----------------------------------------------------------------------------------- # # Produce histograms with Gaussian distributions (signal & background) of varying separation: # ----------------------------------------------------------------------------------- # Nsigma = np.array([0.0, 0.5, 1.0, 1.5, 2.0, 3.0]) # We choose six signal-background separations # Based on the above, define the means and widths of the signal and background Gaussians: # I choose the widths to be unit, and the means to move symmetrically around 5.0: # Nsigma = (mean_sig - mean_bkg) / sqrt(width_sig^2 + width_bkg^2) = dmean / sqrt(2) mean_sig = 5 + Nsigma / np.sqrt(2.0) mean_bkg = 5 - Nsigma / np.sqrt(2.0) Nentries = 10000 random_sig = r.normal(loc=mean_sig, scale=1, size=(Nentries, len(Nsigma))) random_bkg = r.normal(loc=mean_bkg, scale=1, size=(Nentries, len(Nsigma))) from matplotlib.gridspec import GridSpec fig = plt.figure(figsize=(16,6)) gs = GridSpec(2,5) # 2 rows, 3 columns ax = [fig.add_subplot(gs[0,0]), # First row, first column fig.add_subplot(gs[1,0]), # Second row, first column fig.add_subplot(gs[0,1]), # First row, second column fig.add_subplot(gs[1,1]), # Second row, second column fig.add_subplot(gs[0,2]), # First row, third column fig.add_subplot(gs[1,2])] # Second row, third column ax2 = fig.add_subplot(gs[:,3:]) # Span both rows and columns from 3 and onwards Nbins = 100 xmin, xmax = 0, 10 hist_sig = [] hist_bkg = [] y_sig = np.zeros((Nbins, len(Nsigma))) y_bkg = np.zeros_like(y_sig) for i in range(len(Nsigma)): hist_sig.append(ax[i].hist(random_sig[:, i], bins=Nbins, range=(xmin, xmax), histtype='step', label='Signal')) hist_bkg.append(ax[i].hist(random_bkg[:, i], bins=Nbins, range=(xmin, xmax), histtype='step', label='Background')) ax[i].set_xlabel('Separating variable') ax[i].set_ylabel('Frequency') ax[i].text(0.75, 0.9, r'{:1.1f}$\sigma$'.format(Nsigma[i]), size=16, verticalalignment='center', transform=ax[i].transAxes) ax[i].legend(loc='upper left') text_sigma_loc = [(0.5, 0.45), (0.36, 0.59), (0.23, 0.72), (0.15, 0.82), (0.1, 0.9), (0.02, 0.96)] for i in range(len(Nsigma)): FPR, TPR = calc_ROC(hist_sig[i], hist_bkg[i]) # rejection_bkg = 1 - eff_bkg text_sigma = r'{:1.1f}$\sigma$'.format(Nsigma[i]) ax2.plot(FPR, TPR, '-', label=text_sigma) ax2.text(text_sigma_loc[i][0], text_sigma_loc[i][1], text_sigma, size=12, verticalalignment='center', transform=ax2.transAxes) ax2.plot([0, 1], [0, 1], '--', label='Random Guess') ax2.set_xlabel('False Positive Rate (Background Efficiency)') ax2.set_ylabel('True Positive Rate (Signal Efficiency)') ax2.set_xlim(0, 1) ax2.set_ylim(0, 1) ax2.legend() fig.tight_layout() plt.show(block=False) if SavePlots: fig.savefig('ROCcurves_GaussianSeparations.pdf', dpi=600) try: __IPYTHON__ except: raw_input('Press Enter to exit') # ----------------------------------------------------------------------------------- # # # Make sure that you understand what the ROC represents, and how it is calculated! # # ----------------------------------------------------------------------------------- #