// ----------------------------------------------------------------------------------- //
/*
  ROOT macro for analysing measurements of the length of the lecture table in Auditorium
  A at NBI.

  There are two measurements of the table length:
    1: Measurement with a 30cm ruler.
    2: Measurement with a 2m folding ruler.

  Each person was asked not only to state the measurement, but also their estimated
  uncertainty. None of the persons could see others measurements in order to get the
  largest degree of independence.

  Author: Troels C. Petersen (NBI)
  Email:  petersen@nbi.dk
  Date:   4th of September 2012
*/
// ----------------------------------------------------------------------------------- //

// Include from C++:
#include <fstream>
#include <iostream>

// Include from ROOT:
#include <TROOT.h>
#include <TH1.h>
#include <TF1.h>
#include <TCanvas.h>
#include <TStyle.h>
#include <TFile.h>


//---------------------------------------------------------------------------------- 
// Simple example of defining your own function. Reading from an end, it returns a
// "double" (i.e. a number with (many) decimals), it's name is "sqr", and as input
// it takes a "double". And between the {}, you define what it should do!
double sqr(double a) {
  return a*a;
}


  // SolutionLevel1:
  // Before the first iteration, I decide not to really consider the errors, as I will
  // not start with the more advanced weighted mean, but simply start with the simplest,
  // namely checking the data, and obtaining an initial result.


// ----------------------------------------------------------------------------------- //
// This is the main function. It does not return anything (void) and takes no input.
// It simply runs whatever is "inside".
void TableMeasurements() {
// ----------------------------------------------------------------------------------- //

  // Set the showing of statistics and fitting results (0 means off, 1111 means all on):
  gROOT->Reset();
  gStyle->SetOptStat(111111);
  gStyle->SetOptFit(1111);

  // Set the graphics:
  gStyle->SetStatBorderSize(1);
  gStyle->SetStatFontSize(0.055);
  gStyle->SetCanvasColor(0);
  gStyle->SetPalette(1);

  int NeventsMax = 100;  // Maximum number of events to be read.
  int Nevents = 0;       // Counter of events read so far.
  char* lname, fname;    // Last and first name (dummy).
  double x[4];           // Array of 5 numbers for storing the current measuremets read.

  // Define three sums to be increased with each event for calculation of mean and width:

  // SolutionLevel1: Given that there are four types of measurements, I would start by
  // defining all the sums, means, and widths as arrays of 2 (i.e. for the 30cm ruler
  // and the 2m folding rule):
  const int Ndata = 2;
  double sum0[Ndata] = {0.0, 0.0};
  double sum1[Ndata] = {0.0, 0.0};
  double sum2[Ndata] = {0.0, 0.0};
  double mean[Ndata];
  double width[Ndata];

  // Define a histogram with the name "Histogram", Title "Hist of...", 50 bins,
  // starting at 0.0 and ending at 1000.0 (thus a bin width of 20.0).
  
  // SolutionLevel1: By hand define two different histograms:
  TH1F* Hist_By30cmR = new TH1F("Hist_By30cmR", "Lengths estimates by 30cm ruler",      1000, 0.0, 5.0);
  TH1F* Hist_By2mFR  = new TH1F("Hist_By2mFR",  "Lengths estimates by 2m folding rule", 1000, 0.0, 5.0);

  ifstream dataset("data_TableMeasurements.txt");      // Open the data file.


  // Start the loop over data:
  // -------------------------
  if (dataset.is_open()) {              // If opening the file succeeded, then...
    // While the reading is not at the End-of-File (eof),
    // and the number of events does not exceed the maximum, do...
    while ((!dataset.eof()) && (Nevents < NeventsMax)) {    

      // Read the dataset into the array "x" of length 4.
      dataset >> x[0] >> x[1] >> x[2] >> x[3];


      // SolutionLevel1: As there are 2 measurements and 2 errors, put these into vectors as well
      // so that any analysis step can be applied to both data sets in a loop:
      double l[Ndata]  = {x[0], x[2]};
      double el[Ndata] = {x[1], x[3]};

      // Write out the first 5 events to the screen with some values.
      // Note how the slighly cryptic "%5.2f" refers to 5 spaces with 2 digits
      // for a float (i.e. double), thus printing out the values nicely.
      if (Nevents < 5)
	printf("  Read: %6d    30cm ruler: %7.4f +- %6.4f    2m folding rule: %7.4f +- %6.4f \n",
	       Nevents, x[0], x[1], x[2], x[3]);

      // For every 10 events (here the "%" is integer division, i.e. 12%5 = 2), print progress.
      if (Nevents%10 == 0 && Nevents > 0)
	printf("  Read %6d events. \n", Nevents);

      // If the value read is good, use this measurement:
      // SolutionLevel1: Loop over measurements and get the sums for each of them (and put them into histograms).
      for (int i=0; i < Ndata; i++) {
	if (l[i] > 0.0) {
	  sum0[i] += 1.0;
	  sum1[i] += l[i];
	  sum2[i] += sqr(l[i]);
	}
      }

      Hist_By30cmR->Fill(l[0]);   // Simply fill the measurement value into the histogram.
      Hist_By2mFR->Fill(l[1]);
      Nevents++;                  // Increase the number of events read by one.
    }
  }

  // After reading the data, finish:
  dataset.close();                                            // Close the data file.
  printf("  Number of entries in total: %6d \n\n", Nevents);  // Write the total number of events read.


// ----------------------------------------------------------------------------------- //

  // Calculate mean and width:
  // SolutionLevel1: Loop over sums to get means and widths:
  for (int i=0; i < Ndata; i++) {
    if (sum0[i] > 1.0) {           // Ensure that there are more than 1 event!
      mean[i] = sum1[i] / sum0[i];
      width[i] = sqrt(sum2[i] / sum0[i] - sqr(mean[i]));
      printf("  Data %1d.   Mean = %7.5f   Width = %7.5f \n", i, mean[i], width[i]);
    } else {
      printf("  Error: Too little data to calculate mean and width! %1d Nevents = %3d \n", i, int(sum0[i]));
    }
  }


// ----------------------------------------------------------------------------------- //
  // Plot histogram on the screen:

  // Make canvas:
  // SolutionLevel1: Plot all the four distributions:
  TCanvas* canvas = new TCanvas("canvas","",250,20,800,600);     // Make a new window
  canvas->Divide(1,2);                                     // Divide it into two canvases

  canvas->cd(1);                                           // Go to the first (upper) canvas
  // Perhaps set the range shown:
  // Hist_By30cmR->GetXaxis()->SetRangeUser(3.2, 3.7);        // Define the X-range

  // Perhaps also fit it with a gaussian:
  // TF1* func_g1 = new TF1("func_g1", "gaus", 3.2, 3.5);  // Define a Gaussian function
  // Hist_By30cmR->Fit("func_g1","LR");                    // Fit histogram with function
  Hist_By30cmR->Draw();                                    // Draw the histogram

  canvas->cd(2);
  // Perhaps set the range shown:
  // Hist_By2mFR->GetXaxis()->SetRangeUser(3.2, 3.7);         // Define the X-range

  // Perhaps also fit it with a gaussian:
  // TF1* func_g2 = new TF1("func_g2", "gaus", 3.2, 3.7);  // Define a Gaussian function
  // Hist_By2mFR->Fit("func_g2","LR");                     // Fit histogram with function
  Hist_By2mFR->Draw();                                     // Draw the histogram

  canvas->Update();
  // canvas->SaveAs("LengthOfTable.png");   // Alternative formats: jpg, eps, pdf, etc!


// ----------------------------------------------------------------------------------- //
  // Write histograms to file:
  TFile *file = new TFile("Histograms.root","RECREATE","Root Histograms");

  Hist_By30cmR->Write();
  Hist_By2mFR->Write();
  file->Write();
  file->Close();

}


//---------------------------------------------------------------------------------- 
/*

Start by taking a close look at the data, both by inspecting the numbers, and then by
considering the histograms produced by running the macro.


Questions:
----------
 1) Consider the calculated mean and widths, and make sure that you understand how
    they were performed. Now add a calculation of the uncertainty on the mean. Do
    the two means agree well?

 2) Do any of the measurements looks wrong/bad/suspecious? Do you see any repeated
    mistakes done? Would you correct or exclude any of the measurements and how would
    you justify this?
    If so, make sure that your program does this, or make a copy of the data file, and
    make the corrections here.

 3) Given the (possibly) updated dataset, fit the length measurements with a Gaussian
    distribution, both for the 30cm and the 2m ruler cases. Should the binning be changed
    for these fits? And is the Gaussian distribution justified?

 4) Given two succesful fits, the width of these Gaussians show the actual uncertainty
    in the measurements. How do they compare to the uncertainties estimated by the people
    making the measurements? Were people optimistic or the contrary?

 5) How much did the single measurement uncertainty go down from the 30cm ruler case
    to the 2m folding rule? Can you quantify how much better the 2m folding rule is?

 6) Is there any correlation between the errors on the measurements and the actual
    distance to the mean? That is, do the errors contain useful information, that
    should be used in the calculation of the most accurate estimate of the mean? 

 7) Try to calculate the weighted mean using the given uncertainties.
    How much difference does this make? Does it improve the precision?


Advanced questions:
-------------------
 1) Does the length of the table seems to be different when measured with a 30cm and a 2m
    ruler? Quantify this statement! I.e. what is the difference, and what is the
    uncertainty on that difference? Is this significantly away from 0? How big an
    effect on the 30cm ruler would this correspond to?

 2) If you were asked for the best estimate of the length of the table, what would
    you do? (If posssible, read Bevington page 58!)


*/
//----------------------------------------------------------------------------------