#!/usr/bin/env python # ----------------------------------------------------------------------------------- # # Python script for producing illustration of ROC curve with Gaussian examples. # # Author: Troels C. Petersen (NBI) # Date: 3rd of December 2016 # # Run by: ./MakeROCfigure.py # # ----------------------------------------------------------------------------------- # #Load modules here from array import array from ROOT import * from math import sqrt, sin, cos, tan, exp #Set plotting style gStyle.SetOptStat(0) gStyle.SetOptFit(0) # Used for text in plots: text = TLatex() # Define a text (to be put in plots) text.SetNDC() # Define all positions to be in a "unit canvas", i.e. [0,1]x[0,1] # Random numbers from the Mersenne-Twister: r = TRandom3() r.SetSeed(1) SavePlots = True verbose = True Nverbose = 10 veryverbose = False # ----------------------------------------------------------------------------------- # # Functions: # ----------------------------------------------------------------------------------- # # Calculate the sqr of a function: def sqr( a ) : return a*a # Calculate ROC curve from two histograms: def CalcROC(histSig, histBkg) : # Loop over histogram bins and calculate vectors of efficiencies: # --------------------------------------------------------------- effSig = array( "f" ) effBkg = array( "f" ) # Check that the two histograms have the same number of bins and same range: if (histSig.GetNbinsX() == histBkg.GetNbinsX()) : if ( abs(histSig.GetXaxis().GetXmax() - histBkg.GetXaxis().GetXmax()) < 0.0001 and abs(histSig.GetXaxis().GetXmin() - histBkg.GetXaxis().GetXmin()) < 0.0001) : Nbins = histSig.GetNbinsX() # Get integral (including underflow and overflow): integralSig = 0.0 integralBkg = 0.0 for ibin in range(Nbins+2) : integralSig += histSig.GetBinContent(ibin) integralBkg += histBkg.GetBinContent(ibin) # Integrate each bin, and add result to ROC curve (contained in effSig and effBkg): effSig.append(0.0) effBkg.append(0.0) sumSig = 0.0 sumBkg = 0.0 for ibin in range (Nbins+1, 0, -1) : sumSig += histSig.GetBinContent(ibin) sumBkg += histBkg.GetBinContent(ibin) effSig.append(sumSig/integralSig) effBkg.append(sumBkg/integralBkg) if (veryverbose) : print " bin %3d: effSig = %5.3f effBkg = %5.3f"%(ibin, effSig[-1], effBkg[-1]) # Make ROC curve in a TGraph of the two arrays, and return this: graphROC = TGraph(len(effSig), effSig, effBkg) return graphROC else : print "ERROR: Signal and Background histograms have different ranges!" return None else : print "ERROR: Signal and Background histograms have different binning!" return None return None # ----------------------------------------------------------------------------------- # # Produce histograms with Gaussian distributions (signal & background) of varying separation: # ----------------------------------------------------------------------------------- # Nsigma = [0.0, 0.5, 1.0, 1.5, 2.0, 3.0] # We choose six signal-background separations # Based on the above, define the means and widths of the signal and background Gaussians: # I choose the widths to be unit, and the means to move symmetrically around 5.0: # Nsigma = (mean_sig - mean_bkg) / sqrt(width_sig^2 + width_bkg^2) = dmean / sqrt(2) mean_sig = [] mean_bkg = [] for isig in range ( len(Nsigma) ) : mean_sig.append(5.0 + Nsigma[isig] * sqrt(2.0) / 2.0) mean_bkg.append(5.0 - Nsigma[isig] * sqrt(2.0) / 2.0) # Produce histograms accordingly: Nentries = 10000 hist_sig = [] hist_bkg = [] graph_ROC = [] for isig in range ( len(Nsigma) ) : name = "hist_sig_Nsigma" + str(isig) # To give each histogram a different name hist_sig.append(TH1D(name, ";Separating variable;Frequency", 100, 0.0, 10.0)) name = "hist_bkg_Nsigma" + str(isig) # To give each histogram a different name hist_bkg.append(TH1D(name, ";Separating variable;Frequency", 100, 0.0, 10.0)) for ientry in range ( Nentries ) : hist_sig[isig].Fill( r.Gaus(mean_sig[isig]), 1.0 ) hist_bkg[isig].Fill( r.Gaus(mean_bkg[isig]), 1.0 ) # Make ROC curve from signal and background histograms: graph_ROC.append(CalcROC(hist_sig[isig], hist_bkg[isig])) # ----------------------------------------------------------------------------------- # # Make nice plot showing histograms inside ROC curve plot: # ----------------------------------------------------------------------------------- # canvas2 = TCanvas("canvas2", "", 20, 20, 900, 800) frame2 = canvas2.DrawFrame(0.0, 0.0, 1.0, 1.0) # I define a frame inside the canvas # frame2.SetTitle("Illustration of distributions and corresponding ROC curves") pads = [] xpads = [0.13, 0.32, 0.51, 0.13, 0.32, 0.13] ypads = [0.68, 0.68, 0.68, 0.50, 0.50, 0.32] xsize = 0.21 ysize = 0.19 xtext = [0.18, 0.32, 0.49, 0.66, 0.77, 0.85] ytext = [0.24, 0.22, 0.20, 0.18, 0.16, 0.14] for isig in range ( len(Nsigma) ) : # Draw ROC curves: canvas2.cd() graph_ROC[isig].SetLineColor(kMagenta) graph_ROC[isig].SetLineWidth(2) graph_ROC[isig].Draw("same") text.SetTextSize(0.035) text.DrawLatex(xtext[isig], ytext[isig], "%3.1f#sigma"%(Nsigma[isig])) # Draw corresponding histogram in its own pad: name = "pad_Nsigma" + str(isig) # To give each pad a different name pads.append(TPad(name, "", xpads[isig], ypads[isig], xpads[isig]+xsize, ypads[isig]+ysize, 0, 0, 0)) pads[-1].SetTopMargin(0.0); pads[-1].SetLineColor(0); pads[-1].SetFillColor(0); pads[-1].GetFrame().SetFillColor(0); pads[-1].GetFrame().SetBorderSize(0); pads[-1].GetFrame().SetLineColor(kWhite); pads[-1].Draw(); pads[-1].cd(); hist_sig[isig].SetLineColor(kRed) hist_sig[isig].SetLineWidth(3) hist_sig[isig].Draw("") hist_bkg[isig].SetLineColor(kBlue) hist_bkg[isig].SetLineWidth(3) hist_bkg[isig].Draw("same") # Text: text.SetTextSize(0.17) text.DrawLatex(0.63, 0.85, "%3.1f#sigma"%(Nsigma[isig])) canvas2.Update() if (SavePlots) : canvas2.SaveAs("ROCcurves_GaussianSeparations.pdf") raw_input( ' ... ' )