#!/usr/bin/env python

# ----------------------------------------------------------------------------------- #
# 
# Python/ROOT macro for illustrating error propagation using random Gaussian numbers.
# 
# The example is based on FIRST doing the error propagation analytically,
# and then verifying it by running a so-called Monte-Carlo (MC) program.
# 
# For more information on error propagation, see:
#     R. J. Barlow: page 48-61 
#     P. R. Bevington: page 36-48
# 
# Author: Troels C. Petersen (NBI)
# Email:  petersen@nbi.dk
# Date:   12th of November 2017
# 
# ----------------------------------------------------------------------------------- #


# ----------------------------------------------------------------------------------- #
# 
#                   DO THE FOLLOWING ANALYTICAL EXERCISE FIRST!!!
# 
# 1) A class of students estimate by eye, that the length of the table in Auditorium A
#    is L = 3.5+-0.4m, and that the width is W = 0.8+-0.2m.
# 
#    Assuming that there is no correlation between these two measurements, calculate
#    ANALYTICALLY what the circumference, area, and diagonal length is including
#    (propagated) uncertainties.
#    Repeat the calculation, given that the correlation is rho(L,W) = 0.5
# 
#    NOTE: Do the above analytical calculation before you continue below!
# 
# ----------------------------------------------------------------------------------- #

from __future__ import division, print_function
import numpy as np
import matplotlib.pyplot as plt
from iminuit import Minuit
from probfit import Chi2Regression #,UnbinnedLH, BinnedChi2 # , BinnedChi2 # , BinnedLH#, , UnbinnedLH, , , Extended
plt.close('all')

# function to create a nice string output
def nice_string_output(names, values, extra_spacing = 2):
    max_values = len(max(values, key=len))
    max_names = len(max(names, key=len))
    string = ""
    for name, value in zip(names, values):
        string += "{0:s} {1:>{spacing}} \n".format(name, value,
                   spacing = extra_spacing + max_values + max_names - len(name))
    return string[:-2]

#---------------------------------------------------------------------------------- 
# Error propagation
#---------------------------------------------------------------------------------- 

# Set parameters:
Nexp = 10000           # Number of experiments
pi = np.pi
SavePlots = False
r = np.random
r.seed(42)

#----------------------------------------------------------------------------------
# Define parameters for two random numbers (Gaussianly distributed):
#----------------------------------------------------------------------------------

mu1   =  3.5
sig1  =  0.4
mu2   =  0.8
sig2  =  0.2
rho12 =  0.0            # Correlation parameter!

if (rho12 < -1.0 or rho12 > 1.0) : 
    print("ERROR: Correlation factor not in interval [-1,1], as it is {6.2f}".format(rho12))
    exit()

# Calculate numbers that allows the transform from uncorrelated variables u and v
# to correlated random numbers x1 and x2 below (see Barlow page 42-44 for method):
theta = 0.5 * np.arctan( 2.0 * rho12 * sig1 * sig2 / ( np.square(sig1) - np.square(sig2) ) )
sigu = np.sqrt( np.abs( (np.square(sig1*np.cos(theta)) - np.square(sig2*np.sin(theta)) ) / ( np.square(np.cos(theta)) - np.square(np.sin(theta))) ) )
sigv = np.sqrt( np.abs( (np.square(sig2*np.cos(theta)) - np.square(sig1*np.sin(theta)) ) / ( np.square(np.cos(theta)) - np.square(np.sin(theta))) ) )
# This is nothing more than a matrix multiplication written out!!!
# Note that the absolute value is taken before the square root to avoid sqrt(x) with x<0.


#----------------------------------------------------------------------------------
# Loop over process:
#----------------------------------------------------------------------------------

x1_all = np.zeros(Nexp)
x2_all = np.zeros_like(x1_all)
x12_all = np.zeros((Nexp, 2))
y_all = np.zeros_like(x1_all)

for i in range( Nexp ) : 

    # Get (uncorrelated) random Gaussian numbers u and v:
    u = r.normal( 0.0, sigu )
    v = r.normal( 0.0, sigv )

    # Transform into (possibly) correlated random Gaussian numbers x1 and x2:
    x1 = mu1 + np.cos(theta)*u - np.sin(theta)*v
    x2 = mu2 + np.sin(theta)*u + np.cos(theta)*v
    
    x1_all[i] = x1
    x2_all[i] = x2
    x12_all[i, :] = x1, x2
    
    # Calculate y (circumference, area or diagonal):
    y = x1 - x2              # Just nonsense formula! Write the appropriate formula yourself!
    y_all[i] = y

    if (i < 5) :
        print("Gaussian:  x1 = {:4.2f}    x2 = {:4.2f}   ->   y ={:5.2f}".format( x1, x2, y ))


#---------------------------------------------------------------------------------- 
# Plot both input distribution and resulting histogram on screen:
#---------------------------------------------------------------------------------- 

fig, ax = plt.subplots(figsize=(10, 6))
counts, xedges, yedges, im = ax.hist2d(x12_all[:, 0], x12_all[:, 1], bins=[120, 80], range= [[0.0, 6.0], [-1.0, 3.0]], cmin=1)
ax.set_aspect('equal')                   # NOTE: This forces the x- and y-axis to have the SAME scale!!!
fig.colorbar(im)                         # ticks=[-1, 0, 1]
ax.set_title('Histogram of lengths (x) and widths (y)')
ax.set_xlabel('x')
ax.set_ylabel('y')
names = ['Entries', 'Mean x', 'Mean y', 'STD Dev. x', 'STD Dev. y']
values = ["{:d}".format(len(x12_all)), 
          "{:.3f}".format(x12_all[:,0].mean()), 
          "{:.3f}".format(x12_all[:,1].mean()), 
          "{:.3f}".format(x12_all[:,0].std(ddof=1)), 
          "{:.3f}".format(x12_all[:,1].std(ddof=1)), 
          ]
ax.plot([0.0, 6.0], [0.0, 0.0], "--k")   # NOTE: This draws a line from [x1, x2], [y1, y2] with dashed line ("--") and in black ("k")
ax.text(0.05, 0.95, nice_string_output(names, values), family='monospace', transform=ax.transAxes, fontsize=10, verticalalignment='top')


plt.tight_layout()
plt.show(block=False)

if (SavePlots) :
    fig.savefig("Dist_2Dgauss.pdf")


# Making a new window (canvas):
# --------------------------------------------------------------------

def gaussian(x, N, mu, sigma):
    return N * 1. / (sigma*np.sqrt(2*np.pi)) * np.exp(-0.5* (x-mu)**2/sigma**2)

xmin, xmax = 0.0, 5.0

fig2, ax2 = plt.subplots(figsize=(10, 6))
counts, bin_edges, _ = ax2.hist(y_all, 100, range=(xmin, xmax), histtype='step')
bin_centers = (bin_edges[1:] + bin_edges[:-1])/2
s_counts = np.sqrt(counts)
#ax2.errorbar(bin_centers[counts>0], counts[counts>0], s_counts[counts>0], fmt='.')

Chi2_object = Chi2Regression(gaussian, bin_centers[counts>0], counts[counts>0], s_counts[counts>0])
minuit = Minuit(Chi2_object, pedantic=False, N=500, mu=3, sigma=1, limit_sigma=(0, 10000), print_level=0) #   
minuit.migrad()  # perform the actual fit
for name in minuit.parameters:
    print("Fit value: {0} = {1:.5f} +/- {2:.5f}".format(name, minuit.values[name], minuit.errors[name]))
Chi2 = minuit.fval
Nvar = 3                     # Number of variables (alpha0 and alpha1)
Ndof = len(counts[counts>0]) - Nvar   # Number of degrees of freedom
from scipy import stats
Prob =  stats.chi2.sf(Chi2, Ndof) # The chi2 probability given N_DOF degrees of freedom

xaxis = np.linspace(xmin, xmax, 1000)
yaxis = gaussian(xaxis, *minuit.args)
ax2.plot(xaxis, yaxis)

names = ['Entries', 'Mean', 'STD Dev.', 'Chi2/ndf', 'Prob', 'N', 'mu', 'sigma']
values = ["{:d}".format(len(y_all)), 
          "{:.3f}".format(y_all.mean()), 
          "{:.3f}".format(y_all.std(ddof=1)),
          "{:.3f} / {:d}".format(Chi2, Ndof),
          "{:.3f}".format(Prob),
          "{:.3f} +/- {:.3f}".format(minuit.values['N'], minuit.errors['N']),
          "{:.3f} +/- {:.3f}".format(minuit.values['mu'], minuit.errors['mu']),
          "{:.3f} +/- {:.3f}".format(minuit.values['sigma'], minuit.errors['sigma']),
          ]

ax2.text(0.65, 0.95, nice_string_output(names, values), family='monospace', transform=ax2.transAxes, fontsize=10, verticalalignment='top')


plt.tight_layout()
plt.show(block=False)

if (SavePlots) :
    fig2.savefig("Dist_ErrorProp.pdf")


# Finally, ensure that the program does not termine (and the plot disappears), before you press enter:
try:
    __IPYTHON__
except:
    raw_input('Press Enter to exit')


#---------------------------------------------------------------------------------- 
# 
# Questions:
# ----------
# 
# 0) First solve the problem of obtaining the Area, Circumference & Diagonal with uncertainty
#    ANALYTICALLY.
# 
# 1) Now look at the program, and assure yourself that you understand what is going on.
#    Put in the correct expression for y in terms of x1=L and x2=W in order to calculate the
#    circumference, area, and diagonal length, and run the program.
#    Does the output correspond well to the results you expected from calculations to begin with?
# 
# 2) Imagine that you wanted to know the central value and uncertainty of y1 and y2, given the
#    same above PDFs for x1=L and x2=W:
#      y1 = log(sqr(x1*tan(x2))+sqrt((x1-x2)/(cos(x2)+1.0+x1))) and
#      y2 = (1.1+sin(20*x1))*fabs(1.0/x2)
#    Get the central value of y, and see if you can quickly differentiate this with
#    respect to x1 and x2, and thus predict what uncertainty to expect for y using
#    the error propagation formula. It is (for once) OK to give up :-)
#    Next, try to estimate the central value and uncertainty using random numbers
#    like above - do you trust this result more? And are the distributions Gaussian?
# 
# 
# Advanced questions:
# -------------------
# 1) Try to generate x1 and x2 with a quadratic correlation, and see that despite
#    not having any linear correlation, the result on circumference, area, and diagonal
#    length is still affected.
# 
#----------------------------------------------------------------------------------