#!/usr/bin/env python # ----------------------------------------------------------------------------------- # # Python script for analysis of ATLAS test beam data. # # The program reads an ASCII (i.e. text) data file, containting a large number of events, # where a charged particle (electron or pion) passed through a slice of the ATLAS detector. # Each passage is recorded by different detectors, boiling down to eleven numbers (some more # relevant than others). The exercise is to separate electron and pion events based on these # numbers, and in turn us this information to measure the interaction of pions and electrons # seperately. # # NOTE: Though the data is from particle physics, it could in principle have been from ANY # other source, and the eleven numbers could for example have been indicators of cancer, # key numbers for investors, or index numbers for identifying potential costumors. # # For more information on ATLAS test beam: # http://www.nbi.dk/~petersen/Teaching/Stat2017/TestBeam/TestBeamDataAnalysis.html # # Author: Troels C. Petersen (NBI) # Email: petersen@nbi.dk # Date: 10th of December 2017 # # ----------------------------------------------------------------------------------- # from __future__ import division, print_function import numpy as np import matplotlib.pyplot as plt plt.close('all') # ----------------------------------------------------------------------------------- # # Read the data file: # ----------------------------------------------------------------------------------- # # Write extensive output verbose = True Nverbose = 10 Nread = 0 # Lists of the eleven variables: Cher_electrons = [] nLT_electrons = [] nHT_electrons = [] EM0_electrons = [] EM1_electrons = [] EM2_electrons = [] EM3_electrons = [] Had0_electrons = [] Had1_electrons = [] Had2_electrons = [] Muon_electrons = [] # Open file f = open('DataSet_AtlasPid_ElectronPion_2GeV.txt', 'r') # Loop over lines and extract variables of interest header = f.readline() for line in f: line = line.strip() # Remove spaces, tabs, and other 'whitespace' from the line from both ends until a character is met columns = line.split() # Split the line at whitespace character (spaces, tabs, etc.) into a list Cher_i = float(columns[ 0]) nLT_i = int (columns[ 1]) nHT_i = int (columns[ 2]) EM0_i = float(columns[ 3]) EM1_i = float(columns[ 4]) EM2_i = float(columns[ 5]) EM3_i = float(columns[ 6]) Had0_i = float(columns[ 7]) Had1_i = float(columns[ 8]) Had2_i = float(columns[ 9]) Muon_i = float(columns[10]) #if ( EM1_i > 0.3 ) : # electrons? Cher_electrons.append(Cher_i) nLT_electrons.append(nLT_i) nHT_electrons.append(nHT_i) EM0_electrons.append(EM0_i) EM1_electrons.append(EM1_i) EM2_electrons.append(EM2_i) EM3_electrons.append(EM3_i) Had0_electrons.append(Had0_i) Had1_electrons.append(Had1_i) Had2_electrons.append(Had2_i) Muon_electrons.append(Muon_i) if (verbose and Nread < Nverbose) : print("Cher: {:6.1f} nLT,nHT: {:2d} {:2d} EM: {:5.2f} {:5.2f} {:5.2f} {:5.2f} Had: {:5.2f} {:5.2f} {:5.2f} Muon: {:5.1f}".format(Cher_i, nLT_i, nHT_i, EM0_i, EM1_i, EM2_i, EM3_i, Had0_i, Had1_i, Had2_i, Muon_i)) Nread += 1 f.close() print(" Total number of events read: {:d}".format(Nread)) Cher_electrons = np.array(Cher_electrons) nLT_electrons = np.array(nLT_electrons) nHT_electrons = np.array(nHT_electrons) EM0_electrons = np.array(EM0_electrons) EM1_electrons = np.array(EM1_electrons) EM2_electrons = np.array(EM2_electrons) EM3_electrons = np.array(EM3_electrons) Had0_electrons = np.array(Had0_electrons) Had1_electrons = np.array(Had1_electrons) Had2_electrons = np.array(Had2_electrons) Muon_electrons = np.array(Muon_electrons) # numpy version: # data = np.loadtxt('DataSet_AtlasPid_ElectronPion_2GeV.txt', skiprows=1, unpack=True) # [Cher_electrons, nLT_electrons, nHT_electrons, EM0_electrons, # EM1_electrons, EM2_electrons, EM3_electrons, Had0_electrons, # Had1_electrons, Had2_electrons, Muon_electrons] = data # ----------------------------------------------------------------------------------- # # Your analysis: # ----------------------------------------------------------------------------------- # # Hist_Cher, Hist_Cher_edges = np.histogram(Cher, bins=200, range=(400, 1400)) # Hist_EM1, Hist_EM1_edges = np.histogram(EM1, bins=200, range=(-0.5, 2.0)) # Hist_CherEM1, Hist_CherEM1_xedges, Hist_CherEM1_yedges = np.histogram2d(Cher, EM1, bins=(100,100), range=[(400,1400),(-0.5,2.0)]) # Make a figure with 2*2=4 subplots fig, ax = plt.subplots(2,2,figsize=(12, 8)) ax[0,0].hist(Cher_electrons, bins=200, range=(400,1400), histtype='step', label='Cherenkov') ax[0,0].set_xlabel("Cherenkov") ax[0,0].set_ylabel("Frequency") ax[1,0].hist(EM1_electrons , bins=200, range=(-0.5,2.0), histtype='step', label='EM1') ax[1,0].set_xlabel("EM1") ax[1,0].set_ylabel("Frequency") # binx, biny, x1, x2, y1, y2 or "binary" h = ax[0,1].hist2d(Cher_electrons, EM1_electrons, (100,100), ((400,1400),(-0.5,2.0)), cmap="Reds") plt.colorbar(h[3],ax=ax[0,1]) # z-scale on the right of the figure ax[0,1].set_xlabel("Cherenkov") ax[0,1].set_ylabel("EM1") # You can add your own fourth plot fig.delaxes(ax[1,1]) plt.tight_layout() plt.show(block=False) # Finally, ensure that the program does not termine (and the plot disappears), before you press enter: try: __IPYTHON__ except: raw_input('Press Enter to exit') # ----------------------------------------------------------------------------------- # # Questions to be answered: # ----------------------------------------------------------------------------------- # # # Generally, this analysis is about separating electrons and pions (and determining how # well this can be done), followed by a few questions characterizing the detector # response to each type of particle. # Below are questions guiding you, some/most of which your analysis should cover, but # you do not have to follow them blindly (I've put "Optional" on those that are not # essential). # Start by considering the data, and get a feel for the typical range of each variable. # Plot the variables, both in 1D and also 2D! From considering these plots, # guess/estimate an approximate knowledge of how electrons and pions distribute # themselves in the variables above, and how to make a selection of these. # # As described on the webpage introducing the data, the three detectors, Cherenkov, # TRT and Calorimeters are each capable of separating electrons and pions. As they are # INDEPENDENT (three separate detectors), they may be used to cross check each other, # and this is what you should use, in fact the essential part of the analysis! # # # Questions: # ---------- # 1) Find for each of these three detector systems one variable (i.e. a combination), # which seem to separate electrons and pions best. For example, start with the # Cherenkov, which is only a single number, and assume/guess that the large peak # at low values is mainly from pions, while the upper broad peak is from electrons. # Now plot the TRT and Calorimeter distributions when the Cherenkov selects a pion # and afterwards an electron. This should give you a hint of how to separate pions # and electrons using the TRT and Calorimeters. # # Hint: Sometimes variables from a single detector are more powerful, when they are # combined, e.g. taken ratios of. For the TRT this may be somewhat obvious, but for # the EMcalo, it is not as simple. While one may simply use the second layer, # involving this layer in a ratio may enhance the separation power. # # 2) Next you should try to see, if you can make a selection, which gives you a # fairly large and clean electron and pion sample, respectively. The question is, how # can you know how clean your sample is and how efficient your selection is? # This can actually be measured in the data itself, using the fact that there are # three independent detectors. For example, start by making an electron and a pion # selection using two of the three variables, and plot the third variable for each of # these selections. Now you can directly see, how electrons and pions will distribute # themselves in this third variable. Are you worried, that there are pions in your # electron sample, and vice versa? Well, there will probably be, but so few, that it # won't matter too much, at least not to begin with. # Why? Well, let us assume that for each detector, 80% of electrons pass your # requirement, but also 10% of pions do. Assuming an even number of electrons and # pions (which is not really the case), then with two detector cuts, you should get # a sample, which is: 0.8*0.8 / (0.8*0.8 + 0.1*0.1) = 98.5% pure. # # Now with this sample based on cuts on the two other detectors, ask what fraction # of electrons and pions passes your electron selection. The fraction of electrons, # that are not selected as electrons will be your TYPE I errors, denoted alpha, # while the fraction of pions, that do pass the cut will be your TYPE II errors, # denoted beta. # Measure these for each of the two cuts in the three detector types, and ask yourself # if they are "reasonable", i.e. something like in the example above. If not, then # you should perhaps reconsider adjusting your cuts. # # By now, you should for each detector have 6 numbers: # - The electron cut value above which you accept an electron. # - The efficiency (i.e. 1-alpha) for electrons of this cut. # - The fake rate (i.e. beta) for pions of this cut. # - The pion cut value below which you accept a pion (may be same value as above!). # - The efficiency (i.e. 1-alpha) for pions of this cut. # - The fake rate (i.e. beta) for electrons of this cut. # # 3) Given the efficiencies and fake rates of each cut, try to combine these (again # assuming that they are independent) into knowledge of your sample purities and # also the total number of electrons and pions in the whole sample. Do the sum of # estimated electrons and pions added actually match the number of particles in total? # This is a good cross check! # # 4) If the number of pions was suddenly 1.000 that of elections, would you still be # able to get a sample of fairly pure (say 90% pure) electrons? And if so, what would # the efficiency for these electrons be? # # 5) One of the purposes of the testbeam what to measure the response of the TRT # detector to exactly electrons and pions. Consider for example only events that has # 33 TRT hits (i.e. nLT = 33). As the High-Threshold probability (i.e. probability of # passing the High-Threshold, given that the Low-Threshold was passed), is assumed to # be constant in the TRT detector (but quite different for electrons and pions), what # distribution should the number of High-Threshold hits (nHT) follow? And is that # really the case, both for electrons and pions? # # 6) Expanding on problem 2), try now to calculate ROC curves for each of the three # detectors. These are obtained by making a clean selection using the two other # detectors for electrons and pions seperately, and then integrating over these two # distributions, using the running (normalised) integral of each as x and y coordinate # in a (ROC) graph. # If you do not manage on your own, perhaps consider the ROC calculator example, # which is posted along with this exercise. # # (Optional) # 7) Still considering nLT = 33, and given that there are both electrons and pions in # the sample (each with a different HT probability), nHT should in the unselected data # be a combination of two distributions. Try to fit the number of HT hits with two # distributions combined. # Do they fit the data? And can this fit be used to estimate the fraction of pions and # electrons in the sample? And does that estimate match you previous estimate? # Perhaps retry with other values for the number of TRT hits. # # (Optional) # 8) Try to select pions using three different (mutually exclusive) techniques: # a) Passing only a hadronic calorimeter requirement (e.g. that the sum of the three # HCal values is above some minimum energy). # b) Passing only Cherenkov AND EMcalo requirements. # c) Passing both a) and b). # Try to measure the HT probability (i.e. fraction of High-Threshold hits) for each of # these three pion samples. Do they agree with each other? # # ----------------------------------------------------------------------------------- #