#!/usr/bin/env python # ----------------------------------------------------------------------------------- # # # ROOT macro for illustrating the importance of VISUAL INSPECTION of a dataset. # # The example is called "Anscombe's Quartet" (F.J. Anscombe, "Graphs in Statistical # Analysis," American Statistician, 27 [February 1973], 17-21) and consists of four # datasets, which have the same: # - Mean of each x variable 9.0 # - Variance of each x variable 10.0 # - Mean of each y variable 7.5 # - Variance of each y variable 3.75 # - Correlation between each x and y variable 0.816 # - Linear regression line y = 3 + 0.5x # # However, they are very different! For more information on Anscombe's Quartet, see: # http://en.wikipedia.org/wiki/Anscombe's_quartet # # Author: Troels C. Petersen (NBI) # Email: petersen@nbi.dk # Date: 22nd of August 2014 # # ----------------------------------------------------------------------------------- # from ROOT import * from array import array #---------------------------------------------------------------------------------- # Run by ./AnscombesQuartet.py # Output: AnscombesQuartet.pdf #---------------------------------------------------------------------------------- gROOT.Reset() # Set the showing of statistics and fitting results (0 means off, 1111 means all on): gStyle.SetOptStat(1111) # gStyle.SetOptStat(0) gStyle.SetOptFit(1111) # gStyle.SetOptFit(0) # Statistics and fitting results replaced in: # gStyle.SetStatX(0.52) # Top left corner. # gStyle.SetStatY(0.86) gStyle.SetStatX(0.89) # Bottom right corner. gStyle.SetStatY(0.33) # Set the graphics: gStyle.SetStatBorderSize(1) gStyle.SetStatFontSize(0.055) gStyle.SetCanvasColor(4) gStyle.SetPalette(1) SavePlots = False # ------------------------------------------------------------------ # # Get data in arrays: # ------------------------------------------------------------------ # Ndatasets = 4 Npoints = 11 # Define data samples. Root objects wants arrays with a specified data type (here floats 'f') x = [ array( 'f', [ 10.0, 8.0, 13.0, 9.0, 11.0, 14.0, 6.0, 4.0, 12.0, 7.0, 5.0 ] ) , array( 'f', [ 10.0, 8.0, 13.0, 9.0, 11.0, 14.0, 6.0, 4.0, 12.0, 7.0, 5.0 ] ) , array( 'f', [ 10.0, 8.0, 13.0, 9.0, 11.0, 14.0, 6.0, 4.0, 12.0, 7.0, 5.0 ] ) , array( 'f', [ 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 19.0, 8.0, 8.0, 8.0 ] ) ] y = [ array( 'f', [ 8.04, 6.95, 7.58, 8.81, 8.33, 9.96, 7.24, 4.26, 10.84, 4.82, 5.68 ] ) , array( 'f', [ 9.14, 8.14, 8.74, 8.77, 9.26, 8.10, 6.13, 3.10, 9.13, 7.26, 4.74 ] ) , array( 'f', [ 7.46, 6.77, 12.74, 7.11, 7.81, 8.84, 6.08, 5.39, 8.15, 6.42, 5.73 ] ) , array( 'f', [ 6.58, 5.76, 7.71, 8.84, 8.47, 7.04, 5.25, 12.50, 5.56, 7.91, 6.89 ] ) ] # ------------------------------------------------------------------ # # Fit data: # ------------------------------------------------------------------ # #Create a lists of TGraphs. TGraphs are initialized by (number of points, x-coord, ycoord) graph = [TGraph( Npoints, x[idataset], y[idataset]) for idataset in range( Ndatasets ) ] fit_p1 = TF1("fit_p1", "[0]*x + [1]", 3.5, 20.0) # Make a linear function in the range [3.5,20.0] # Alternatively to "[0]*x + [1]", # I could have written "pol1", a predefined ROOT function fit_p1.SetParameters(1.0, 1.0) # Set its parameters # Make canvas: canvas = TCanvas("canvas","",650,20,800,600) # Make a new window canvas.SetFillColor(0) # Make it white canvas.Divide(2,2) # Divide it into a 2x2 window # Fit and plot graphs: for idataset in range( Ndatasets ) : # Loop over data sets canvas.cd( idataset+1 ) # Point at the relevant window in canvas fit_p1.SetLineColor(kBlue) # Set function line color! fit_p1.SetLineWidth(2) # Set function line width! graph[idataset].SetMarkerColor(kRed) # Set data marker color graph[idataset].SetMarkerStyle(20) # Set data marker type graph[idataset].SetMarkerSize(1.0) # Set data marker size graph[idataset].Fit("fit_p1", "R") # Fit data with function in defined range graph[idataset].Draw("AP") # Draw data (Axis and Points) in window canvas.Update() # Make sure everything is in! if (SavePlots) : canvas.SaveAs("AnscombesQuartet.pdf") # Print window to file #Tell python to wait here untill you do something raw_input( ' ... Press enter to exit ... ' ) #---------------------------------------------------------------------------------- # # First acquaint yourself with the program, and get yourself a "free" (hopefully not first!) # look at how ROOT works. Understand that each of the four distributions are being fitted # with a linear function (here called "fit_p1") and the results plottet. There are comments # for most lines in the macro! # # Run the macro, and then take a close look at each of the four results. # # # Questions: # ---------- # 1) Looking closely at each of the four fits, determine which points gives the largest # contribution to the "mismatch" (that is chi-square) between the data and the fit. # # 2) Consider how YOU would treat each of the four datasets and fit them! # # # Advanced questions: # ------------------- # 1) How would you with (smarter) statistical techniques detect that something was # not right without looking? # # #---------------------------------------------------------------------------------- # # Key Questions: # -------------- # https://docs.google.com/forms/d/1GMQ6H99IyONWZHTuCw_ExBKpQrVAtqqoH9QnT3pvWwE/viewform # * Which scenario looks most like real data? # * No uncertainties are used, but assuming that they are all of the same size, you # can from the Chi2 infere them. What are they? # #----------------------------------------------------------------------------------