// ----------------------------------------------------------------------------------- //
/*
  ROOT macro for analysing measurements of the length of the lecture table in Auditorium
  A at NBI.

  The measurements were done first with a 30cm ruler, and then a 2m folding ruler, and
  each person was asked not only to state the measurement, but also their estimated
  uncertainty. None of the persons could see previous measurements in order to get the
  largest degree of independence.

  The data sample also includes the measurements from last years course, giving a chance
  to compare with a completely independent measurement.

  Author: Troels C. Petersen (NBI/CERN)
  Email:  Troels.Petersen@cern.ch
  Date:   7th of September 2010
*/
// ----------------------------------------------------------------------------------- //


// Defining a new function (for taking the square):
// ----------------------------------------------------------------------------------- //
double sqr(double a) {
  return a*a;
}


// ----------------------------------------------------------------------------------- //
void TableMeasurements_Solution() {
// ----------------------------------------------------------------------------------- //
  gROOT->Reset();

  // Set the showing of statistics and fitting results (0 means off, 1111 means all on):
  gStyle->SetOptStat(1111);
  gStyle->SetOptFit(1111);

  // Set the graphics:
  gStyle->SetStatBorderSize(1);
  gStyle->SetStatFontSize(0.055);

  gStyle->SetFillColor(0);
  gStyle->SetCanvasColor(0);
  gStyle->SetPadColor(0);
  gStyle->SetPalette(1);


  // ------------------------------------------------------------------ //
  // Get data in arrays:
  // ------------------------------------------------------------------ //

  // Note: The naming scheme is that the "estimates by eye" samples (not included here)
  //       are called "a" and "b", and therefore the 30cm and 2m stick measurements are
  //       dubbed "c" and "d". "L" and "E" denotes "Length" and "Error", respectively,
  //       and the two numbers refere to the year they were made.

  const int Nmeas09 = 17;

  double Lc09[Nmeas09] =
    {335.5, 325.0, 306.2, 335.9, 335.5, 335.2, 336.0, 335.9, 335.0, 335.0, 336.0, 335.6, 335.2, 335.7, 317.0, 335.0, 335.6};
  double Ec09[Nmeas09] =
    {  0.5,   5.0,   1.1,   0.7,   3.0,   1.5,   1.0,   0.2,   1.0,   0.5,   4.0,   0.6,   3.0,   3.0,   0.8,   1.1,   1.0};

  double Ld09[Nmeas09] =
    {336.5, 336.5, 136.6, 336.3, 336.3, 136.5, 336.5, 336.1, 336.5, 335.4, 336.5, 336.6, 336.6, 336.4, 336.3, 336.3, 335.6};
  double Ed09[Nmeas09] =
    {  0.2,   0.1,   0.4,   0.4,   2.0,   1.0,   0.2,   0.07,  0.5,   0.5,   0.2,   0.2,   0.3,   0.3,   0.2,   0.3,   0.25};

  // Note: Third last measurement with a 30cm ruler was actually done with a 20cm ruler!


  const int Nmeas10 = 24;

  char* names[Nmeas10] =
    {"Troels", "Bjarke", "Bo F", "Martin", "Kjartan", "Heidi", "Sonja", "Tina", "Christoffer", "Kristoffer", "Peter", "Diana",
     "Silvia", "Lars", "Simon", "Logi", "Ninna", "Jeppe", "Kasper", "Song", "Alexander", "Rastin", "Dan", "Bo M"};

  // A copy of the original data is kept for record, such that any alterations can always be reconsidered!!!
  /*
  double Lc10[Nmeas10] =
    {336.5, 337.0, 336.1, 336.4, 340.0, 306.0, 336.5, 300.0, 340.0, 335.0, 335.0, 339.0, 340.0, 305.0, 336.7, 335.0, 340.0, 337.0, 330.0, 336.3, 336.0, 333.0, 367.0, 337.0};
  double Ec10[Nmeas10] =
    {  0.5,   0.1,  10.0,   1.0,  10.0,   5.0,   2.0,  10.0,  15.0,  12.0,   5.0,   2.0,  10.0,   5.0,   0.4,  10.0,  33.0,   2.0,   3.0,   1.2,   2.0,   2.0,   1.5,   1.0};
  double Ld10[Nmeas10] =
    {336.5, 336.0, 336.4, 336.0, 336.0, 306.0, 336.5, 335.0, 337.0, 336.5, 336.0, 336.0, 337.0, 335.0, 336.4, 337.0, 335.0, 336.0, 375.0, 336.4, 336.3, 336.0, 336.0, 336.5};
  double Ed10[Nmeas10] =
    {  0.1,   0.3,   0.3,   0.2,   5.0,   0.5,   1.0,   2.0,   5.0,   0.5,   1.0,   0.5,   5.0,   5.0,   0.1,   5.0,   5.0,   0.5,   2.0,   0.2,   0.2,   1.0,   1.0,   0.5};
  */
  // Note: Kasper thought that his second measurement was a bit wierd - he wasn't entirely sure of his calculation.

  // Data preparation:
  // The measurements with a clear error are corrected, while those suspected of being wrong are omitted in the following calculations (done by negating error!)
  double Lc10[Nmeas10] =
    {336.5, 337.0, 336.1, 336.4, 340.0, 336.0, 336.5, 330.0, 340.0, 335.0, 335.0, 339.0, 340.0, 335.0, 336.7, 335.0, 340.0, 337.0, 330.0, 336.3, 336.0, 333.0, 337.0, 337.0};
  double Ec10[Nmeas10] =
    {  0.5,   1.0,  10.0,   1.0,  10.0,   5.0,   2.0,  10.0,  15.0,  12.0,   5.0,   2.0,  10.0,   5.0,   0.4,  10.0,  33.0,   2.0,   3.0,   1.2,   2.0,   2.0,   1.5,   1.0};

  double Ld10[Nmeas10] =
    {336.5, 336.0, 336.4, 336.0, 336.0, 306.0, 336.5, 335.0, 337.0, 336.5, 336.0, 336.0, 337.0, 335.0, 336.4, 337.0, 335.0, 336.0, 375.0, 336.4, 336.3, 336.0, 336.0, 336.5};
  double Ed10[Nmeas10] =
    {  0.1,   0.3,   0.3,   0.2,   5.0,  -0.5,   1.0,   2.0,   5.0,   0.5,   1.0,   0.5,   5.0,   5.0,   0.1,   5.0,   5.0,   0.5,  -2.0,   0.2,   0.2,   1.0,   1.0,   0.5};


  // Print the data:
  printf("  Measurement:              With 30cm ruler       With 2m folding ruler \n");
  for (int i=0; i < Nmeas10; i++) {
    if ((Ec10[i] > 0.0) && (Ed10[i] > 0.0)) {
      printf("     %2d %12s       %6.1f +- %4.1f cm     %6.1f +- %3.1f cm \n",
	     i+1, names[i], Lc10[i], Ec10[i], Ld10[i], Ed10[i]);
    }
  }


  // Calculate averages, widths, and correlation (unweighted and weighted):
  // ----------------------------------------------------------------------
  double sumC[3] = {0.0, 0.0, 0.0};
  double sumD[3] = {0.0, 0.0, 0.0};
  double sumCD   = 0.0;

  double sumCw[3] = {0.0, 0.0, 0.0};
  double sumDw[3] = {0.0, 0.0, 0.0};
  double sumCDw   = 0.0;

  for (int i=0; i < Nmeas10; i++) {

    // Only include measurements where BOTH seems reasonable!
    if ((Ec10[i] > 0.0) && (Ed10[i] > 0.0)) {

      // Sums for averages and widths (unweighted and weighted):
      sumC[0] += 1.0;
      sumC[1] += Lc10[i];
      sumC[2] += sqr(Lc10[i]);
      sumCw[0] += 1.0 / sqr(Ec10[i]);
      sumCw[1] += Lc10[i] / sqr(Ec10[i]);
      sumCw[2] += sqr(Lc10[i]) / sqr(Ec10[i]);

      sumD[0] += 1.0;
      sumD[1] += Ld10[i];
      sumD[2] += sqr(Ld10[i]);
      sumDw[0] += 1.0 / sqr(Ed10[i]);
      sumDw[1] += Ld10[i] / sqr(Ed10[i]);
      sumDw[2] += sqr(Ld10[i]) / sqr(Ed10[i]);

      // Sum for correlation:
      sumCD += Lc10[i] * Ld10[i];
    }
  }

  // Averages, widths, and correlation:
  printf("  Averages, widths, and correlation based on %d measurements. \n", sumC[0]);

  sumC[1] = sumC[1] / sumC[0];
  sumD[1] = sumD[1] / sumD[0];
  sumC[2] = sqrt(sumC[2] / sumC[0] - sqr(sumC[1]));
  sumD[2] = sqrt(sumD[2] / sumD[0] - sqr(sumD[1]));
  printf("  30cm ruler (unweighted):    Average = %7.3f +- %5.3f     Width = %7.3f \n", sumC[1], sumC[2]/sqrt(sumC[0]), sumC[2]);
  printf("   2 m stick (unweighted):    Average = %7.3f +- %5.3f     Width = %7.3f \n", sumD[1], sumD[2]/sqrt(sumD[0]), sumD[2]);

  // WEIGHTED averages:
  sumCw[1] = sumCw[1] / sumCw[0];
  sumDw[1] = sumDw[1] / sumDw[0];
  printf("  30cm ruler (weighted):      Average = %7.3f +- %5.3f \n", sumCw[1], sqrt(1.0/sumCw[0]));
  printf("   2 m stick (weighted):      Average = %7.3f +- %5.3f \n", sumDw[1], sqrt(1.0/sumDw[0]));

  // Note that the correlation can only be taken, if the number of measurements
  // of sample C and sample D are the same, i.e. they have to match in pairs.
  // If a measurement in one sample is for some reason dropped, then for the
  // correlation calculation, the corresponding measurement in the other sample
  // also have to be dropped.
  // In this case, I chose to drop both the C and D sample measurement if
  // one was found "suspicious/unreasonable".
  sumCD = (sumCD/sumD[0] - sumC[1]*sumD[1]) / (sumC[2] * sumD[2]);
  printf("  Correlation between C and D samples: %6.3f \n \n", sumCD);


  // For reference, 2009's weighted average was:
  //  - 30cm ruler: 335.75 +- 0.13 cm.
  //  - 2m   stick: 336.29 +- 0.08 cm.
  // They were incompatible at more than 3 sigma!

  // Fitting the measurements with a Gaussian gave:
  //  - 30cm ruler: 0.52 cm   (2.35 cm in 2010)
  //  - 2m   stick: 0.35 cm   (0.56 cm in 2010)

  // Bonus question: Why then is the 2010 weighted average more precise than that of 2009?
  // Answer: Because a few dedicated people made very accurate measurements, and assigned very small (yet, correct) errors to them!


  // ------------------------------------------------------------------ //
  // Put data into histograms:
  // ------------------------------------------------------------------ //

  // Make histograms:
  TH1F* Hist_Lc = new TH1F("Hist_Lc", "Histogram of Length - 30cm ruler",      20, 325.0, 345.0);
  TH1F* Hist_Ec = new TH1F("Hist_Ec", "Histogram of Errors - 30cm ruler",      20,   0.0,  20.0);
  TH1F* Hist_Ld = new TH1F("Hist_Ld", "Histogram of Length - 2m folding rule", 40, 325.0, 345.0);
  TH1F* Hist_Ed = new TH1F("Hist_Ed", "Histogram of Errors - 2m folding rule", 40,   0.0,  10.0);

  TH2F* Hist_AccVsErr30 = new TH2F("Hist_AccVsErr30", "Correlation between accuracy and error - 30cm", 40, 0.0, 8.0, 40, 0.0, 20.0);
  TH2F* Hist_AccVsErr2m = new TH2F("Hist_AccVsErr2m", "Correlation between accuracy and error - 2m",   40, 0.0, 4.0, 40, 0.0, 10.0);
  TH2F* Hist_AccVsAcc = new TH2F("Hist_AccVsAcc", "Correlation between 30cm and 2m accuracy", 40, 0.0,  8.0, 40, 0.0,  4.0);
  TH2F* Hist_ErrVsErr = new TH2F("Hist_ErrVsErr", "Correlation between 30cm and 2m error",    40, 0.0, 20.0, 40, 0.0, 10.0);


  // Put data into the histograms:
  for (int i=0; i < Nmeas10; i++) {
    if ((Ec10[i] > 0.0) && (Ed10[i] > 0.0)) {
      Hist_Lc->Fill(Lc10[i]);
      Hist_Ec->Fill(Ec10[i]);
      Hist_Ld->Fill(Ld10[i]);
      Hist_Ed->Fill(Ed10[i]);

      Hist_AccVsErr30->Fill(fabs(Lc10[i] - sumDw[1]), Ec10[i]);
      Hist_AccVsErr2m->Fill(fabs(Ld10[i] - sumDw[1]), Ed10[i]);
      Hist_AccVsAcc->Fill(fabs(Lc10[i] - sumDw[1]), fabs(Ld10[i] - sumDw[1]));
      Hist_ErrVsErr->Fill(Ec10[i], Ed10[i]);
    }
  }


  // ------------------------------------------------------------------ //
  // Fit data and make plots:
  // ------------------------------------------------------------------ //

  // Make canvas:
  canvas = new TCanvas("canvas","",250,20,1000,750);               // Make a new window
  canvas->Divide(2,3);                                             // Divide it into a 2x3 window

  canvas->cd(1);
  TF1* gauss_Lc = new TF1("gauss_Lc", "gaus", 325.0, 345.0);
  Hist_Lc->Fit("gauss_Lc","RL");                                    // Option "R" means: Use the function range only!

  canvas->cd(2);
  TF1* gauss_Ld = new TF1("gauss_Ld", "gaus", 325.0, 345.0);
  Hist_Ld->Fit("gauss_Ld","RL");                                    // Option "R" means: Use the function range only!

  // Set marker color and style:
  int color = 2;   // Red
  int style = 20;  // Open circle
  Hist_AccVsErr30->SetMarkerColor(color);
  Hist_AccVsErr30->SetMarkerStyle(style);
  Hist_AccVsErr2m->SetMarkerColor(color);
  Hist_AccVsErr2m->SetMarkerStyle(style);
  Hist_AccVsAcc->SetMarkerColor(color);
  Hist_AccVsAcc->SetMarkerStyle(style);
  Hist_ErrVsErr->SetMarkerColor(color);
  Hist_ErrVsErr->SetMarkerStyle(style);


  canvas->cd(3);
  Hist_AccVsErr30->Draw();
  printf("  Correlation between accuracy and error (30cm): %5.2f \n",
	 Hist_AccVsErr30->GetCorrelationFactor());

  canvas->cd(4);
  Hist_AccVsErr2m->Draw();
  printf("  Correlation between accuracy and error (2m):   %5.2f \n",
	 Hist_AccVsErr2m->GetCorrelationFactor());


  canvas->cd(5);
  Hist_AccVsAcc->Draw();

  canvas->cd(6);
  Hist_ErrVsErr->Draw();


  canvas->Update();
  // canvas->SaveAs("LengthOfTable.eps");


//---------------------------------------------------------------------------------- 
  // Estimates of the table length:
  // (Diana, Rastin, Martin, Heidi, Kristoffer+Tina, Jeppe+Sonja, Song, Sylvia, Bo F):
  const int Nest = 9;
  double meas[Nest] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0};
  double emeas[Nest] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
  double est[Nest]  = {336.22, 336.70, 336.36, 336.366, 336.24, 336.52, 336.18, 336.15, 336.16};
  double eest[Nest] = {  0.28,   0.00,   0.06,   0.003,   0.05,   0.28,   0.00,   0.57,   0.00};

  TGraphErrors* ResEst = new TGraphErrors(Nest, meas, emeas, est, eest);

}

//---------------------------------------------------------------------------------- 
/*

Start by taking a close look at the data, both by inspecting the numbers, and then by
considering the histograms produced by running the macro.


Questions:
----------
 1) Given this data set, which (if any) corrections would you consider making to the
    measurements, and how would you justify them? Would you also make any corrections
    to the associated errors? And are there any data points, which you would exclude?

 2) Given the (possibly) updated dataset, fit the length measurements with a Gaussian
    distribution, both for the 30cm and the 2m ruler cases. Should the binning be changed
    for these fits? And is the Gaussian distribution justified?

 3) Given two succesful fits, the width of these Gaussians actually give an estimate
    of the uncertainty in the measurements. How do they compare to the uncertainties
    estimated by the people making the measurements?
    How much was the estimated uncertainties lowered from first to second type of
    measurement? Was this justified?

 4) Is there any correlation between the measurements? And is there any correlation
    between the errors on the measurements?
    And finally is there any correlation between the accuracy of the measurements
    (i.e. how close they are to the mean) and their associated uncertainty? If this
    is the case, then it somewhat justifies weighting the measurements according to
    their uncertainty (see below).

 5) Try to calculate the mean and then the weighted mean. How much difference does this
    make? Why?


Advanced questions:
-------------------
 1) Is the length of the table different when measured with a 30cm and a 2m
    ruler - quantify this statement! I.e. what is the difference, and what is the
    uncertainty on that difference? Is this significantly away from 0? How big an
    effect on the 30cm ruler would this correspond to (i.e. is it really 30cm)?

 2) In question 2) above you chose a binning - does this have any influence on the
    result?

 3) If you were asked for the best estimate of the length of the table, what would
    you do? (If posssible, read Bevington page 58!)


*/
//----------------------------------------------------------------------------------