#!/usr/bin/env python

# ----------------------------------------------------------------------------------- 
#
#  Python/ROOT macro for illustration of the Central Limit Theorem (CLT).
#
#  Random numbers from different distributions (but with RMS = 1) are added to a sum,
#  and many such sums are plottet. As it turns out (and as dictated by the CLT),
#  their distribution turns about to be Gaussian.
#  The example also illutrates how widths (and therefore uncertainties) are added in
#  quadrature, as one has to divide the sum by the square root of the number of random
#  numbers that went into the sum in order to get a Gaussian of unit width (when using
#  random numbers of unit width, i.e. RMS = 1).
#
#  For more information on the Central Limit Theorem, see:
#    R. Barlow: page 49
#    G. Cowan: page 33
#    http://en.wikipedia.org/wiki/Central_limit_theorem
#
#  Run this macro by:
#    From prompt: > ./CentralLimit.py
#  
#  Result in file results.root, displayed by:
#    In prompt: > root -l results.root
#    Followed by (in ROOT): TBrowser a (use mouse from there)
#
#  Author: Troels C. Petersen (NBI)
#  Email:  petersen@nbi.dk
#  Date:   11th of November 2016
#
#  ----------------------------------------------------------------------------------- //

from ROOT import *
import math

gROOT.Reset()

# Set the showing of statistics and fitting results (0 means off, 1111 means all on):
gStyle.SetOptStat(1111)
gStyle.SetOptFit(1111)

r = TRandom3()                        # Random number generator
Nexperiments = 1000                   # Number of sums produced
Nuniform     = 10                     # Number of uniform numbers used in sum
Nexponential =  0                     # Number of exponential numbers used in sum
Ncauchy      =  0                     # Number of cauchy numbers used in sum

pi = 3.14159265358979323846264338328  # Selfexplanatory!!!
pi = TMath.Pi()                       # Another way of doing it - probably better!

verbose = True                        # Print some numbers or not?
Nverbose = 10                         # If so, how many?

SavePlots = False                     # Save the plots produced to file(s)?


#----------------------------------------------------------------------------------
# Make histograms (name, title, number of bins, minimum, maximum):

Hist_Sum         = TH1F("Hist_Sum"        , "Hist_Sum"        , 240, -6.0, 6.0)
Hist_Uniform     = TH1F("Hist_Uniform"    , "Hist_Uniform"    , 240, -6.0, 6.0)
Hist_Exponential = TH1F("Hist_Exponential", "Hist_Exponential", 240, -6.0, 6.0)
Hist_Cauchy      = TH1F("Hist_Cauchy"     , "Hist_Cauchy"     , 240, -6.0, 6.0)


#----------------------------------------------------------------------------------
# Loop over process:
#----------------------------------------------------------------------------------

N3sigma = 0        # Counter for the number of produced sums, that fall outside +-3 sigma

for iexp in range( Nexperiments ) : 

    if (iexp % 100 == 0) : print "At iexp : ", iexp       # Show progress!
    sum = 0.0                                             # sum is the number we are going to add random numbers to!
                                                          # According to the CLT, it should be Gaussianly distributed.
 
    # Generating uniform numbers (with mean 0, and RMS of 1):
    for i in range( Nuniform ) : 
        x = math.sqrt(12.0) * (r.Uniform() - 0.5)         # Uniform between +-sqrt(3). Why?
        sum += x
        Hist_Uniform.Fill(x)
        if verbose and iexp == 0 and i < Nverbose : print "   Uniform:     %7.4f"%x

    # Generating exponential numbers (with mean 0, and RMS of 1):
    for i in range( Nexponential ) : 
        x = -math.log( r.Uniform() ) - 1.0                # Exponential starting at -1. Why?
        # x = r.Exp() - 1.0                               # Alternative way to produce x exponentially.
        sum += x
        Hist_Exponential.Fill(x)
        if verbose and iexp == 0 and i < Nverbose : print "   Exponential: %7.4f"%x

    # Generating numbers according to a Cauchy distribution (1 / (1 + x^2)):
    for i in range( Ncauchy ) : 
        x = math.tan(pi * (r.Uniform() - 0.5))            # Cauchy with mean 0
        # x = r.Cauchy()                                  # Alternative way to produce x according to Cauchy PDF.
        sum += x
        Hist_Cauchy.Fill(x)
        if verbose and iexp == 0 and i < Nverbose : print "   Cauchy       : %7.4f"%x

    Ntotal = Nuniform + Nexponential + Ncauchy
    sum = sum / math.sqrt( Ntotal )     # Ask yourself, why I divide by sqrt(N)?
    Hist_Sum.Fill(sum)
    if not (-3.0 < sum < 3.0) :
        N3sigma += 1

# Write how many of the experiments had a result outside the range [-3,3], i.e. beyond +-3 sigma:
print "  N experiments beyond 3 sigma / total: %4d / %d    (%6.4f) \n"%(N3sigma, Nexperiments, float(N3sigma)/float(Nexperiments))


#---------------------------------------------------------------------------------- 
# Draw the input distributions:
#---------------------------------------------------------------------------------- 

# Define a "canvas", i.e. a new window on the screen. This is again a ROOT thing
# (hence the "T" in TCanvas), and you give it name, title, x-position, y-position, x-size and y-size:
canvas1 = TCanvas( "canvas1", "Distribution of Cauchy numbers", 20, 20, 1200, 500)

# Next, divide the canvas into 3x1 "pads":
canvas1.Divide(3,1)

# Now go to each of the pads and draw the input histogram there:
canvas1.cd(1); Hist_Uniform.Draw()
canvas1.cd(2); Hist_Exponential.Draw()
canvas1.cd(3); Hist_Cauchy.Draw()

# Update the plot and possibly save.
canvas1.Update()
if (SavePlots) : canvas2.SaveAs("Plot_CentralLimit_Input.pdf")      # Save plot (format follow extension name)


#---------------------------------------------------------------------------------- 
# Draw output plots with corresponding fits to the screen:
#---------------------------------------------------------------------------------- 

canvas2 = TCanvas( "canvas2", "Distribution of sums", 50, 50, 1200, 600)

Hist_Sum.GetXaxis().SetRangeUser(-6.0, 6.0)       # From the histogram, we get the x-axis, and set the range.
Hist_Sum.SetMinimum(0)                            # We also set the minimum to be zero (not to cheat the eye).
Hist_Sum.GetXaxis().SetTitle("Sum")               # We give the x-axis a label.
Hist_Sum.GetYaxis().SetTitle("Frequency")         # We give the y-axis a label.

Hist_Sum.SetMarkerColor(kBlue)                    # Set the histogram marker color to blue.
Hist_Sum.SetMarkerStyle(20)                       # Set the histogram marker style to filled circles.
# How could you know that 20 is filled circles? Well, ROOT is well documented and quite used,
# so if you write "setmarkerstyle" in Google, the first hit is: http://root.cern.ch/root/html/TAttMarker.html
# Here, you can find all the options, details, etc.

# We define a function in ROOT as shown below. "T" from ROOT and "F1" is a function in 1D.
# Again it has a name (chosen to be the same as the function name), and then one has to
# specify the function expression, here a predefined ROOT function "gaus" (guess what that is!).
# Finally, the function range is set:
fit_Sum = TF1("fit_Sum", "gaus", -5.0, 5.0)
fit_Sum.SetParameters(float(Nexperiments), 0.0, 1.0)   # Set the starting values of the fit parameters.
fit_Sum.SetLineColor(kRed)               # Set the line color to red.
fit_Sum.SetLineWidth(3)                  # Set the line width to 3.

# So now we ask that the histogram is fitted by the fitting function.
# The options "L" is for Likelihood fit (ChiSquare is defalt),
# and "R" is asking the fit to use the function range (whole range is default).
Hist_Sum.Fit("fit_Sum", "LR")

# Draw the histogram with the option "e" for showing the errors in the plot.
Hist_Sum.Draw("e")

# Define a unit Gaussian to draw what a "perfect" Gaussian result would look like.
# Note 1: This function does not have any parameters in it, and is thus just a function!
# Note 2: Why is there a "50.0" in the normalisation? Hint: Width of the bins in the histogram!
func_UnitGauss = TF1("func_UnitGauss", "50.0/sqrt(2*3.1416) * exp(-0.5*x*x)", -5.0, 5.0)
func_UnitGauss.SetLineColor(kBlue)
func_UnitGauss.SetLineWidth(3)
func_UnitGauss.Draw("same")     # "same" means on top of what is already plotted!

canvas2.Update()
if (SavePlots) : canvas2.SaveAs("Plot_CentralLimit_Output.pdf")      # Save plot (format follow extension name)


#---------------------------------------------------------------------------------- 
# Write histograms to file:
#---------------------------------------------------------------------------------- 
outfile = TFile("results.root", "RECREATE", "Histograms")

Hist_Sum.Write()
Hist_Uniform.Write()
Hist_Exponential.Write()
Hist_Cauchy.Write()

outfile.Write()
outfile.Close()


# Finally, ensure that the program does not termine (and the plot disappears), before you press enter:
raw_input('Press Enter to exit')


# ---------------------------------------------------------------------------------- 
# 
# First make sure that you understand what the Central Limit Theorem (CLT) states!
# Then, acquaint yourself with the program. Make sure that you read through it, as many
# of these features will be used onwards. Do you understand why the uniform distribution
# needs to go from +-sqrt(3) in order to give a distribution with a width of one (i.e. unit)
# and why you subtract one from the exponential distribution (and how this works at all)?
# 
# Then try to see what the result of adding 10 uniform random numbers is? Which
# distribution does it give (surprice!!!), and how well does it resemble it?
# 
# 
# Questions:
# ----------
#  1) What is the mean and RMS of the input distributions?
#
#  2) Why is there a "/sqrt(N)" in line 102 (when summing up the various contributions to sum)?
#     Hint: Assume that I always wanted to compare the distribution of sums with a UNIT Gaussian.
#
#  3) Using a sum of 10 uniform random numbers with mean 0 and width 1, what is the expected
#     width of the resulting distribution according to CLT? What is the probability of
#     obtaining a number beyond 3 sigma, i.e. how many numbers did you get beyond 3 sigma?
#     What would you expect from a true Gaussian distribution?
#     And what about the same question for 3.5 sigma?
#     Perhaps increase the number of experiments run to (much) more than 1000...
# 
#  4) Now try to add 10 exponential. Does that give something Gaussian?
#     Then try to add 10 cauchy numbers. Does that give something Gaussian?
#     If not Gaussian, why do the Cauchy distribution "ruin" the Gaussian distribution?
#     And is this in conflict with the Central Limit Theorem?
# 
# 
# Advanced questions:
# -------------------
#  1) If one used a trunkated mean of 10 Cauchy numbers (throwing away the top and bottom e.g. 10%),
#     will the truncated mean of 1000 Cauchy numbers then converge to a Gaussian?
# 
#  2) Try to combine 10 exponential numbers into sums 1000 times, and do a ChiSquare fit
#     with a Gaussian to the resulting distribution. What is the Chi2 and Ndof of this?
#     Is that a good fit? (We will discuss this much more later in the course).
# 
#  3) How few/many uniform random numbers needs to be added, before the probability
#     for the sum to follow a Gaussian distribution is greater than 1% (on average)
#     when using 1000 sums (i.e. Nexperiments = 1000)?
#
# ----------------------------------------------------------------------------------