// ----------------------------------------------------------------------------------- //
/*
  ROOT macro for analysing Cavendish's dataset for measuring the density of the Earth.

  In 1798, Henry Cavendish estimated the density of the earth by using a torsion balance.
  The "Cavendish" dataset contains his 29 measurements of the density of the earth,
  presented as a multiple of the density of water.

  To appreciate Cavendish's accuracy it is only necessary to compare his results with
  those of later scientists, to see how slowly his results were improved upon:
    1798 5.45 1.3 % low H. Cavendish
    1838 5.49 0.5 % low F. Reich
    1842 5.67 2.7 % high F. Baily
    1852 5.58 1.1 % high F. Reich
    1883 5.56 0.8 % high A. Cornu and J. Baille
    1895 5.53 0.2 % high C. V. Boys

  Reference:
    Moore, David S., and George P. McCabe (1989). Introduction to the Practice of Statistics.
    Original source: Stigler, S.M., "Do robust estimators work with real data?",
    Annals of Statistics, 5 (1977), pp. 1055-1078.
    Bevington, page 56.
    http://en.wikipedia.org/wiki/Cavendish_experiment
    http://en.wikipedia.org/wiki/Schiehallion_experiment

  Author: Troels C. Petersen (NBI/CERN)
  Email:  Troels.Petersen@cern.ch
  Date:   30th of September 2010
*/
// ----------------------------------------------------------------------------------- //


// ----------------------------------------------------------------------------------- //
void CavendishExp() {
// ----------------------------------------------------------------------------------- //
  gROOT->Reset();

  // Setting of general plotting style:
  gStyle->SetCanvasColor(0);
  gStyle->SetFillColor(0);
  // Setting what to be shown in statistics box:
  gStyle->SetOptStat("emr");
  gStyle->SetOptFit(1111);


  // ------------------------------------------------------------------ //
  // Get data in arrays:
  // ------------------------------------------------------------------ //

  // Open data file:
  FILE *data = fopen("DataSet_CavendishExperiment.txt","r");

  const int Nmax = 29;
  int n = 0;
  double density[Nmax];

  // Loop over and read data as long as there is data (i.e. not End-Of-File (EOF)).
  while (fscanf(data, "%lf \n", &density[n]) != EOF) {
    printf("  Read density: %5.3f \n", density[n]);
    n++;
  }
  printf(" Found %d entries. \n", n);

  fclose(data);


  // ------------------------------------------------------------------ //
  // Analyse data:
  // ------------------------------------------------------------------ //

  // Make histogram:
  TH1F* Hist_Dist = new TH1F("Hist_Dist", "Density", 40, 3.5, 7.5);

  // Put data into the histogram:
  for (int i=0; i < n; i++) {
    Hist_Dist->Fill(density[i]);
  }


  // Make canvas:
  canvas = new TCanvas("canvas","",100,20,600,450);

  TF1* fit_gauss = new TF1("fit_gauss", "gaus", 3.5, 6.5);
  Hist_Dist->Fit("fit_gauss","rl");
  Hist_Dist->Draw();
}


//---------------------------------------------------------------------------------- 
/*

Start by taking a close look at the data, both by inspecting the numbers, and then by
considering the histograms produced by running the macro.


Questions:
----------
 1) Calculate the mean and the width!

 2) Given this data set, which (if any) corrections to or discarding of data would you
    consider making, and how would you justify them?


Advanced questions:
-------------------
 1) Assuming the mean and width calculated in 1), make 10000 MC experiments with
    29 Gaussianly distributed numbers, and consider the distribution of ln(likelihood)
    calculated by yourself.
    Where did the likelihood of the original fit to the data lie in comparison?
    What does this tell you about the outlying data point?


*/
//----------------------------------------------------------------------------------