#!/usr/bin/env python # ----------------------------------------------------------------------------------- # # Python/pyROOT macro for applying TMVA classifiers on a dataset. # Specify input, training variables etc in top of script before running in shell. # Assumes that the TrainClassifier.py script has been run first # # Authors: Lars Egholm Pedersen & Troels Petersen (NBI) # Date: 7th of December 2016 # # ----------------------------------------------------------------------------------- # import ROOT from array import array # ------------------------------------------------------------ # Setup input and training parameters here # Use Ascii or .ROOT input read_ascii = False # Path prefix to TMVA weight files created by the TrainClassifier.py script # There should be some file like [weightprefix]_BDTG.weights.xml weightprefix = "weights/TMVAClassification" # Path to signal and background files (may be the same s.t. difference is specified by cut) sigpath = "./SimpleDataset.root" bkgpath = "./SimpleDataset.root" # If reading from ROOT file, specify names of signal and background trees (may be different) sigtree_name = "ntuple" bkgtree_name = "ntuple" # Otherwise, specify path to ascii files: #sigpath = "./SimpleDataset.txt" #bkgpath = "./SimpleDataset.txt" # Define cut/requirements specifying whether a training point is signal or background sigcut = "isSignal > 0.5" bkgcut = "isSignal < 0.5" # Name of variables varlist = ["x", "y"] # Define list of methods that you have trained in the previous script (Given by titles) methodlist = [ "BDTA", "Fisher" ] # ------------------------------------------------------------ # Section for setting up TMVA weight reader # ------------------------------------------------------------ reader = ROOT.TMVA.Reader() # Define a dictionary of variables vardict["name"] = value s.t. it can be accessed by the # same name as in the root file or ascii header. vardict = { ivar : array('f', [0.0]) for ivar in varlist } # Connect variables to reader for ivar in varlist : # name, array that will contain values reader.AddVariable( ivar, vardict[ivar] ) # Tell the reader which methods you want to use: for imeth in methodlist : # Make sure the weightprefix and method names match whatever is is ./weights reader.BookMVA( imeth, weightprefix+"_"+imeth+".weights.xml" ) # ------------------------------------------------------------ # Section where classifiers are used on datasets # ------------------------------------------------------------ # TTree where data will be read from sigfile = ROOT.TFile() bkgfile = ROOT.TFile() sigtree = ROOT.TTree() bkgtree = ROOT.TTree() # In this script the ascii file is converted to a ROOT TTree to make it easier to # switch back and forth between ROOT and ascii. # But otherwise there is no real reason to not just read the ascii file directly # before filling the arrays. if read_ascii : sigtree.ReadFile( sigpath ) bkgtree.ReadFile( bkgpath ) else : sigfile = ROOT.TFile( sigpath, "READ" ) bkgfile = ROOT.TFile( bkgpath, "READ" ) # Get tree out of file while imposing selection criteria sigtree = sigfile.Get( sigtree_name ).CopyTree( sigcut ) bkgtree = bkgfile.Get( bkgtree_name ).CopyTree( bkgcut ) # ---------------------------------- # Signal file processing # ---------------------------------- print "Signal processing" # Connect variables to signal tree for ivar in varlist : sigtree.SetBranchAddress( ivar, vardict[ivar] ) # Loop events for ientry in xrange( sigtree.GetEntries() ) : # Retrieve data and check if valid if sigtree.GetEntry(ientry) < 0 : print "Error reading Signal tree, breaking" break # ---------------------------------- # Your analysis would go here!!! # ---------------------------------- print vardict["x"][0] print vardict["y"][0] for imeth in methodlist : print imeth, reader.EvaluateMVA( imeth ) # Or just access by index print reader.EvaluateMVA( methodlist[0] ) print "" sigfile.Close() for ivar in varlist : vardict[ivar][0] = 0.0 # ---------------------------------- # Background file processing # ---------------------------------- print "Background processing" # Connect variables to signal tree for ivar in varlist : bkgtree.SetBranchAddress( ivar, vardict[ivar] ) # Loop events for ientry in xrange( bkgtree.GetEntries() ) : # Retrieve data and check if valid if bkgtree.GetEntry(ientry) < 0 : print "Error reading Background tree, breaking" break # ---------------------------------- # Your analysis would go here!!! # ---------------------------------- print vardict["x"][0] print vardict["y"][0] for imeth in methodlist : print imeth, reader.EvaluateMVA( imeth ) # Or just access by index print reader.EvaluateMVA( methodlist[0] ) print "" bkgfile.Close() for ivar in varlist : vardict[ivar][0] = 0.0 # ------------------------------------------------------------ # Section where classifiers are evaluated as function of variables # Only works for the example of two variables print "Printing classifier scores" # Loop in 11 steps from -3.0, to 3.0 for xvar in [-3.0 + 6.0*xi/10 for xi in xrange(5)] : for yvar in [-3.0 + 6.0*yi/10 for yi in xrange(5)] : # Analysis here !!! vardict["x"][0] = xvar vardict["y"][0] = yvar for imeth in methodlist : print "%s gave (%3.1f, %3.1f) a score of %4.2f"%(imeth, xvar, yvar, reader.EvaluateMVA( imeth ) ) print "" raw_input( ' Press any key to exit ' )