#!/usr/bin/env python

# ----------------------------------------------------------------------------------- #
#
#  Python macro for illustrating the Binomial, Poisson and Gaussian distributions,
#  and how they transform into each other (well, into the Gaussian).
#
#  Reference:
#    Barlow, chapter 3
#
#  Run this Python macro by:
#    > ./BinomialPoissonGaussian.py
#
#  Author: Troels C. Petersen (NBI)
#  Email:  petersen@nbi.dk
#  Date:   19th of November 2017
#
# ----------------------------------------------------------------------------------- #

from __future__ import division, print_function
import numpy as np
import matplotlib.pyplot as plt
from iminuit import Minuit
from probfit import Chi2Regression
from scipy.stats import binom, poisson, norm
from scipy import stats
plt.close('all')

# Set parameters:
r = np.random                       # Random generator
r.seed(42)                          # Fixed order of random numbers

SavePlots = False
verbose = True
Nverbose = 10

Nexperiments = 1000


#----------------------------------------------------------------------------------
# Initial figures:
#----------------------------------------------------------------------------------

def func_Binomial_pmf(x, n, p):
    return binom.pmf(x, n, p)

def func_Gaussian_pdf(x, mu, sigma) :
    return norm.pdf(x, mu, sigma)


n, p = 10, 0.2

xaxis = np.linspace(-0.5, 10.5, 1000)
yaxis_binom = func_Binomial_pmf(np.floor(xaxis+0.5), n, p) # note the floor command here
yaxis_gauss = func_Gaussian_pdf(xaxis, n*p, np.sqrt(n*p*(1-p)))

fig0, ax0 = plt.subplots(figsize=(10, 6))
ax0.plot(xaxis, yaxis_binom, '-', label='Binomial with n={:2d} and p={:.3f}'.format(n, p))
ax0.plot(xaxis, yaxis_gauss, '-', label='Gaussian with mu={:.2f} and sigma={:.3f}'.format(n*p, np.sqrt(n*p*(1-p))))
ax0.set_xlim(-0.5, 10.5)
ax0.set_title('Probability for Binomial and Gaussian')
ax0.set_xlabel('x')
ax0.set_ylabel('Probability')
plt.legend(loc='upper right')


#----------------------------------------------------------------------------------
# Loop over process:
#----------------------------------------------------------------------------------


# Define process, i.e. number of trials and probability of success:
Ntrials = 2000
pSuccess = 1.0/60.0
Lambda = Ntrials * pSuccess     # This is the mean and also how the Poisson parameter is defined!


all_nSuccess = np.zeros(Nexperiments)
# Run the experiments, and fill the histogram from above:
for iexp in range( Nexperiments ) : 
    
    # Simulating process defined:
    nSuccess = 0
    for i in range( Ntrials ) : 
        x = r.uniform()
        if (x < pSuccess): 
            nSuccess = nSuccess + 1

    # Record result:
    if (verbose and iexp < Nverbose): 
        print("  nSuccess: {:4d}".format(nSuccess))
        
    # Save Result
    all_nSuccess[iexp] = nSuccess


#---------------------------------------------------------------------------------- 
# Plot result:
#---------------------------------------------------------------------------------- 


# Define a histogram with the "data" (Note: think about the binning!):
counts, bin_edges = np.histogram(all_nSuccess, bins=Ntrials+1, range=(-0.5, Ntrials+0.5))
bin_centers = (bin_edges[1:] + bin_edges[:-1])/2
s_counts = np.sqrt(counts)

x = bin_centers[counts>0]
y = counts[counts>0]
sy = s_counts[counts>0]

fig, ax = plt.subplots()
ax.errorbar(x, y, yerr=sy, xerr=0.5, label='Distribution of nSucces',
            fmt='.k',  ecolor='k', elinewidth=1, capsize=1, capthick=1)
ax.set_xlim(-0.5, 60)
ax.set_xlabel('Number of successes')
ax.set_ylabel('Frequency')


# Draw Binomial:
# --------------
def func_Binomial(x, N, n, p):
    return N * binom.pmf(x, n, p)

Chi2_Bin = Chi2Regression(func_Binomial, x, y, sy)
minuit_Bin = Minuit(Chi2_Bin, pedantic=False, N=Nexperiments, n=Ntrials, p=pSuccess) #   
minuit_Bin.migrad()  # perform the actual fit

xaxis = np.linspace(-0.5, 60, 1000)
yaxis = func_Binomial(np.floor(xaxis+0.5), *minuit_Bin.args)
ax.plot(xaxis, yaxis, '-', label='Binomial distribution')

# Draw Poisson:
# -------------
def func_Poisson(x, N, mu) :
    return N * poisson.pmf(x, mu)

Chi2_Poisson = Chi2Regression(func_Poisson, x, y, sy)
minuit_Poisson = Minuit(Chi2_Poisson, pedantic=False, N=Nexperiments, mu=Lambda) #   
minuit_Poisson.migrad()  # perform the actual fit

# Note that the np.floor function takes the integer/rounded (towards zero) value of numbers.
yaxis = func_Poisson(np.floor(xaxis+0.5), *minuit_Poisson.args)
ax.plot(xaxis, yaxis, '-', label='Poisson distribution')

# Draw Gaussian:
# -------------
def func_Gaussian(x, N, mu, sigma) :
    return N * norm.pdf(x, mu, sigma)

Chi2_Gaussian = Chi2Regression(func_Gaussian, x, y, sy)
minuit_Gaussian = Minuit(Chi2_Gaussian, pedantic=False, N=Nexperiments, mu=Lambda, sigma=np.sqrt(Lambda)) #   
minuit_Gaussian.migrad()  # perform the actual fit

yaxis = func_Gaussian(xaxis, *minuit_Gaussian.args)
ax.plot(xaxis, yaxis, '-', label='Gaussian distribution')

plt.legend()

plt.tight_layout() 
plt.show(block=False)

if (SavePlots): 
    fig.savefig("BinomialPoissonGaussian.pdf")


#---------------------------------------------------------------------------------- 
# Your calculation of ChiSquare values!
# I suggest you use Pearson's Chi2, and require Nobs and/or Nexp > e.g. 0.1
# Remember with your choice to ensure, that there is no division by zero.
#---------------------------------------------------------------------------------- 

Nbins = len(x)                # Just to know how many bins to loop over
chi2_bino = 0.0
Ndof = 0

for ibin in range (Nbins) :
    Nobs = y[ibin]    
    if (Nobs > 0) :
        Nexp_bino = func_Binomial(x[ibin], *minuit_Bin.args)

print("  Binomial:   chi2 = {:5.2f}   Ndof = {:2d}   Prob = {:6.4f}   (just a test printout!!!) ".format(0.0, 0, 0.0))


# Finally, ensure that the program does not termine (and the plot disappears), before you press enter:
try:
    __IPYTHON__
except:
    raw_input('Press Enter to exit')

#---------------------------------------------------------------------------------- 
# 
# Make sure you understand what process yields a Binomial, a Poisson and a Gaussian
# distribution.
# 
# The program repeats an experiment, where one performs N trials, each with p chance
# of success, and then considers the distribution of successes.
# 
# 
# Questions:
# ----------
#  1) Plot a Binomial (N=27, p=1/3), Poisson (lambda = 9), and Gaussian (mu=9, sigma=3).
#     Which distribution has the longest tail towards high values? And which one has
#     the longest tail the other way? Possibly use a logarithmic plot.
# 
#  2) Using Nexperiments=1000, Ntrials=10 and pSuccess=1/6, calculate "by hand" (i.e.
#     using a loop) the ChiSquare between the data and each of the three distributions.
#     Since you are not fitting anything, what is the number of degrees of freedom?
#     Do they give a reasonable Chi2 probability?
#     Retry with Ntrials=100 and pSuccess=1/60. Does the degree of fit improve for any
#     of the three?
#     Retry again with Ntrials=1000 and pSuccess=1/60. Again, does the quality of the fit
#     improve further for any of the three?
# 
#  3) In the above case with Ntriels=100 and pSuccess=1/60, calculate the mean and the
#     width of the distribution. Do they have the relation, that you would expect from
#     a Poisson distribution?
#     Try it for other values where the Poisson describes the data well, and check if
#     the Poisson relation between mean and width holds.
# 
#  4) Using Nexperiments=1000, Ntrials=1000 and pSuccess=1/60, is the skewness consistent
#     with zero (as the Gaussian should have)? Does that contradict the conclusion from
#     above?
# 
#----------------------------------------------------------------------------------