// ----------------------------------------------------------------------------------- // /* ROOT macro for illustrating the importance of VISUAL INSPECTION of a dataset. The example is called "Anscombe's Quartet" (F.J. Anscombe, "Graphs in Statistical Analysis," American Statistician, 27 [February 1973], 17-21) and consists of four datasets, which have the same: - Mean of each x variable 9.0 - Variance of each x variable 10.0 - Mean of each y variable 7.5 - Variance of each y variable 3.75 - Correlation between each x and y variable 0.816 - Linear regression line y = 3 + 0.5x However, they are very different! For more information on Anscombe's Quartet, see: http://en.wikipedia.org/wiki/Anscombe's_quartet Author: Troels C. Petersen (NBI/CERN) Email: Troels.Petersen@cern.ch Date: 26th of August 2010 */ // ----------------------------------------------------------------------------------- // //---------------------------------------------------------------------------------- // Run in ROOT by: .x AnscombesQuartet.c // Output: AnscombesQuartet.eps //---------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------- // void AnscombesQuartet() { // ----------------------------------------------------------------------------------- // gROOT->Reset(); // Set the showing of statistics and fitting results (0 means off, 1111 means all on): gStyle->SetOptStat(1111); // gStyle->SetOptStat(0); gStyle->SetOptFit(1111); // gStyle->SetOptFit(0); // Statistics and fitting results replaced in: // gStyle->SetStatX(0.52); // Top left corner. // gStyle->SetStatY(0.86); gStyle->SetStatX(0.89); // Bottom right corner. gStyle->SetStatY(0.33); // Set the graphics: gStyle->SetStatBorderSize(1); gStyle->SetStatFontSize(0.055); gStyle->SetCanvasColor(4); gStyle->SetPalette(1); // ------------------------------------------------------------------ // // Get data in arrays: // ------------------------------------------------------------------ // const int Ndatasets = 4; const int Npoints = 11; double x[Ndatasets][Npoints] = {{10.0, 8.0, 13.0, 9.0, 11.0, 14.0, 6.0, 4.0, 12.0, 7.0, 5.0}, {10.0, 8.0, 13.0, 9.0, 11.0, 14.0, 6.0, 4.0, 12.0, 7.0, 5.0}, {10.0, 8.0, 13.0, 9.0, 11.0, 14.0, 6.0, 4.0, 12.0, 7.0, 5.0}, { 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 19.0, 8.0, 8.0, 8.0}}; double y[Ndatasets][Npoints] = {{ 8.04, 6.95, 7.58, 8.81, 8.33, 9.96, 7.24, 4.26, 10.84, 4.82, 5.68}, { 9.14, 8.14, 8.74, 8.77, 9.26, 8.10, 6.13, 3.10, 9.13, 7.26, 4.74}, { 7.46, 6.77, 12.74, 7.11, 7.81, 8.84, 6.08, 5.39, 8.15, 6.42, 5.73}, { 6.58, 5.76, 7.71, 8.84, 8.47, 7.04, 5.25, 12.50, 5.56, 7.91, 6.89}}; // ------------------------------------------------------------------ // // Fit data: // ------------------------------------------------------------------ // // Make graphs: TGraph* graph[Ndatasets]; // Make 4 graphs for (int idataset=0; idataset < Ndatasets; idataset++) { // Put data into them graph[idataset] = new TGraph(Npoints, x[idataset], y[idataset]); } TF1 *fit_p1 = new TF1("fit_p1", "[0]*x + [1]", 3.5, 20.0); // Make a linear function fit_p1->SetParameters(1.0, 1.0); // Set its parameters // Make canvas: canvas = new TCanvas("canvas","",650,20,600,450); // Make a new window canvas->SetFillColor(0); // Make it white canvas->Divide(2,2); // Divide it into a 2x2 window // Fit and plot graphs: for (int idataset=0; idataset < Ndatasets; idataset++) { // Loop over data sets canvas->cd(idataset+1); // Point at the relevant window fit_p1->SetLineColor(4); // Set function line color! fit_p1->SetLineWidth(2); // Set function line width! graph[idataset]->SetMarkerColor(2); // Set data marker color graph[idataset]->SetMarkerStyle(20); // Set data marker type graph[idataset]->SetMarkerSize(1.0); // Set data marker size graph[idataset]->Fit("fit_p1","r"); // Fit data with function graph[idataset]->Draw("AP"); // Draw data in window } canvas->Update(); // Make sure everything is in! canvas->SaveAs("AnscombesQuartet.eps"); // Print window to file } //---------------------------------------------------------------------------------- /* First acquaint yourself with the program, and get yourself a "free" (possibly first) look at how ROOT works. Understand that each of the four distributions are being fitted with a linear function (here called "fit_p1") and the results plottet. There are comments for most lines in the macro! Run the macro, and then take a close look at each of the four results. Questions: ---------- 1) Looking closely at each of the four fits, determine which points gives the largest contribution to the "mismatch" (which is chi-square) between the data and the fit. 2) Consider how YOU would treat each of the four datasets and fit them! Advanced questions: ------------------- 1) How would you with (smarter) statistical techniques detect that something was not right? */ //----------------------------------------------------------------------------------