#!/usr/bin/env python # ----------------------------------------------------------------------------------- # # Python script for producing illustration of ROC curve with Gaussian examples. # # Author: Troels C. Petersen (NBI) # Date: 3rd of December 2015 # # ----------------------------------------------------------------------------------- # #Load modules here from array import array from ROOT import * from math import sqrt, sin, cos, tan, exp #Set plotting style gStyle.SetOptStat(0) gStyle.SetOptFit(0) # Random numbers from the Mersenne-Twister: r = TRandom3() r.SetSeed(1) SavePlots = True verbose = True Nverbose = 10 veryverbose = False # ----------------------------------------------------------------------------------- # # Functions: # ----------------------------------------------------------------------------------- # # Calculate the sqr of a function: def sqr( a ) : return a*a # Calculate ROC curve from two histograms: def CalcROC(histSig, histBkg) : # Loop over histogram bins and calculate vectors of efficiencies: # --------------------------------------------------------------- effSig = array( "f" ) effBkg = array( "f" ) # Check that the two histograms have the same number of bins and same range: if (histSig.GetNbinsX() == histBkg.GetNbinsX()) : if ( abs(histSig.GetXaxis().GetXmax() - histBkg.GetXaxis().GetXmax()) < 0.0001 and abs(histSig.GetXaxis().GetXmin() - histBkg.GetXaxis().GetXmin()) < 0.0001) : Nbins = histSig.GetNbinsX() # Get integral (including underflow and overflow): integralSig = 0.0 integralBkg = 0.0 for ibin in range(Nbins+2) : integralSig += histSig.GetBinContent(ibin) integralBkg += histBkg.GetBinContent(ibin) # Integrate each bin, and add result to ROC curve (contained in effSig and effBkg): effSig.append(0.0) effBkg.append(0.0) sumSig = 0.0 sumBkg = 0.0 for ibin in range (Nbins+1, 0, -1) : sumSig += histSig.GetBinContent(ibin) sumBkg += histBkg.GetBinContent(ibin) effSig.append(sumSig/integralSig) effBkg.append(sumBkg/integralBkg) if (veryverbose) : print " bin %3d: effSig = %5.3f effBkg = %5.3f"%(ibin, effSig[-1], effBkg[-1]) # Make ROC curve in a TGraph of the two arrays, and return this: graphROC = TGraph(len(effSig), effSig, effBkg) return graphROC else : print "ERROR: Signal and Background histograms have different ranges!" return None else : print "ERROR: Signal and Background histograms have different binning!" return None return None # ----------------------------------------------------------------------------------- # # Produce histograms with Gaussian distributions (signal & background) of varying separation: # ----------------------------------------------------------------------------------- # Nsigma = [0.0, 0.5, 1.0, 1.5, 2.0, 3.0] # We choose six signal-background separations # Based on the above, define the means and widths of the signal and background Gaussians: # I choose the widths to be unit, and the means to move symmetrically around 5.0: # Nsigma = (mean_sig - mean_bkg) / sqrt(width_sig^2 + width_bkg^2) = dmean / sqrt(2) mean_sig = [] mean_bkg = [] for isig in range ( len(Nsigma) ) : mean_sig.append(5.0 + Nsigma[isig] * sqrt(2.0) / 2.0) mean_bkg.append(5.0 - Nsigma[isig] * sqrt(2.0) / 2.0) # Produce histograms accordingly: Nentries = 10000 hist_sig = [] hist_bkg = [] graph_ROC = [] for isig in range ( len(Nsigma) ) : name = "hist_sig_Nsigma" + str(isig) # To give each histogram a different name hist_sig.append(TH1D(name, ";Separating variable;Frequency", 100, 0.0, 10.0)) name = "hist_bkg_Nsigma" + str(isig) # To give each histogram a different name hist_bkg.append(TH1D(name, ";Separating variable;Frequency", 100, 0.0, 10.0)) for ientry in range ( Nentries ) : hist_sig[isig].Fill( r.Gaus(mean_sig[isig]), 1.0 ) hist_bkg[isig].Fill( r.Gaus(mean_bkg[isig]), 1.0 ) # Make ROC curve from signal and background histograms: graph_ROC.append(CalcROC(hist_sig[isig], hist_bkg[isig])) # ----------------------------------------------------------------------------------- # # Make plot showing histograms inside ROC curve plot: # ----------------------------------------------------------------------------------- # canvas2 = TCanvas("canvas2", "ROC curve example", 20, 20, 900, 800) canvas2.DrawFrame(0.0, 0.0, 1.0, 1.0) # I define a frame inside the canvas pads = [] xpads = [0.13, 0.31, 0.49, 0.13, 0.31, 0.13] ypads = [0.68, 0.68, 0.68, 0.50, 0.50, 0.32] xsize = 0.22 ysize = 0.20 for isig in range ( len(Nsigma) ) : # Draw ROC curves: canvas2.cd() graph_ROC[isig].SetLineColor(kMagenta) graph_ROC[isig].SetLineWidth(2) graph_ROC[isig].Draw("same") # Draw corresponding histogram in its own pad: name = "pad_Nsigma" + str(isig) # To give each pad a different name pads.append(TPad(name, "", xpads[isig], ypads[isig], xpads[isig]+xsize, ypads[isig]+ysize, 0, 0, 0)) pads[-1].SetTopMargin(0.0); pads[-1].SetLineColor(0); pads[-1].SetFillColor(0); pads[-1].GetFrame().SetFillColor(0); pads[-1].GetFrame().SetBorderSize(0); pads[-1].GetFrame().SetLineColor(kWhite); pads[-1].Draw(); pads[-1].cd(); hist_sig[isig].SetLineColor(kRed) hist_sig[isig].SetLineWidth(3) hist_sig[isig].Draw("") hist_bkg[isig].SetLineColor(kBlue) hist_bkg[isig].SetLineWidth(3) hist_bkg[isig].Draw("same") """ legend2 = TLegend( 0.15, 0.70, 0.38, 0.85 ) legend2.SetFillColor(kWhite) legend2.SetLineColor(kWhite) legend2.AddEntry(graph_ROC[isig], " Fraction HT - All hits", "LP") legend2.Draw() """ canvas2.Update() if (SavePlots) : canvas2.SaveAs("ROCcurves_GaussianSeparations.pdf") raw_input( ' ... ' )