#!/usr/bin/env python

# ----------------------------------------------------------------------------------- #
#
#  Python/ROOT macro for constructing a Fisher disciminant from two 2D Gaussianly
#  distributed correlated variables.
#  The macro creates artificial random data for two different types of processes,
#  and the goal is then to separate these by constructing a Fisher discriminant.
#
#  References:
#    Glen Cowan, Statistical Data Analysis, pages 51-57
#    http://en.wikipedia.org/wiki/Linear_discriminant_analysis
#
#  Author: Troels C. Petersen (NBI)
#  Email:  petersen@nbi.dk
#  Date:   28th of December 2015
#
# ----------------------------------------------------------------------------------- #

# ----------------------------------------------------------------------------------- #
# Imports
# ----------------------------------------------------------------------------------- #
from ROOT import *

# ----------------------------------------------------------------------------------- #
# Functions
# ----------------------------------------------------------------------------------- #
def sqr( a ) : 
    return a*a

#Function for generating a set of correlated random gaussian numbers...
def get_corr( mu1, sig1, mu2, sig2, rho12 ) : 
    r = TRandom3(0) # Just gonna initialize random generator in function for simplicity
                    # Be carefull ALWAYS to use random seed that is changing faster
                    # than you are calling the function!!

    theta = 0.5 * atan( 2.0 * rho12 * sig1 * sig2 / ( sqr(sig1) - sqr(sig2) ) )
    sigu = sqrt( fabs( (sqr(sig1*cos(theta)) - sqr(sig2*sin(theta)) ) / ( sqr(cos(theta)) - sqr(sin(theta))) ) )
    sigv = sqrt( fabs( (sqr(sig2*cos(theta)) - sqr(sig1*sin(theta)) ) / ( sqr(cos(theta)) - sqr(sin(theta))) ) )

    u = r.Gaus( 0.0, sigu )
    v = r.Gaus( 0.0, sigv )

    x = mu1 + cos(theta)*u - sin(theta)*v
    y = mu2 + sin(theta)*u + cos(theta)*v

    return [x,y]


# ----------------------------------------------------------------------------------- #
# Fisher discriminant script.
# ----------------------------------------------------------------------------------- #
gROOT.Reset()

# Setting of general plotting style:
gStyle.SetCanvasColor(0)
gStyle.SetFillColor(0)

# Setting what to be shown in statistics box (using two different methods):
gStyle.SetOptStat("emr")
gStyle.SetOptFit(1111)


# ----------------------------------------------------------------------------------- #
# Define parameters
# ----------------------------------------------------------------------------------- #

nspec   = 2                   # Number of 'species' : signal / background

mean_par0  = [ 15.0 , 12.0  ] # Process type 1,2 mean in x direction
width_par0 = [  2.0 ,  3.0  ] # Process type 1,2 width in x direction
mean_par1  = [ 50.0 , 55.0  ] # ... y
width_par1 = [  6.0 ,  7.0  ] # ... y
corr       = [  0.80,  0.90 ] # Coefficient of correlation

ndata    = 2000         # Amount of data you want to create

color_index  = [ 2,  4]  # Make process type 1 red and two blue...
marker_index = [24, 25]  # Marker style for correlation plot

par_name = ["Parameter_a", "Parameter_b"] #Parameter names
draw_opt = ["", "same"]                   #Drawing option ... 

SavePlots = True


# ----------------------------------------------------------------------------------- #
# Define all your graphics here
# ----------------------------------------------------------------------------------- #

hist_par0 = [ TH1D("hist_par0_spec0", "hist_par0_spec0", 50,  0.0, 25.0) , #Histograms for "par0"
              TH1D("hist_par0_spec1", "hist_par0_spec1", 50,  0.0, 25.0) ] #Projection

hist_par1 = [ TH1D("hist_par1_spec0", "hist_par1_spec0", 50, 20.0, 80.0) , #Histograms for "par1"
              TH1D("hist_par1_spec1", "hist_par1_spec1", 50, 20.0, 80.0) ] #Projection

hist_corr   = [ TH2D("corr_par0_par1_spec0", "corr_par0_par1_spec0", 50, 0.0, 25.0, 50, 20.0, 80.0) , #Correlation 
                TH2D("corr_par0_par1_spec1", "corr_par0_par1_spec1", 50, 0.0, 25.0, 50, 20.0, 80.0) ] #histograms

hist_fisher = [ TH1D("hist_fisher_spec0", "hist_fisher_spec0", 200, -22.0, 3.0) , #Histograms for final
                TH1D("hist_fisher_spec1", "hist_fisher_spec1", 200, -22.0, 3.0) ] #fisher projection

# ----------------------------------------------------------------------------------- #
# Generate data and fill initial histograms
# ----------------------------------------------------------------------------------- #

for ispec in range( nspec ) : 
    for iexp in range( ndata ) : 
        #Get liniarly correlated random numbers...
        values = get_corr( mean_par0[ispec], width_par0[ispec], mean_par1[ispec], width_par1[ispec], corr[ispec] )

        hist_par0[ispec].Fill( values[0] )
        hist_par1[ispec].Fill( values[1] ) 
        hist_corr[ispec].Fill( values[0], values[1] )

# ----------------------------------------------------------------------------------- #
# Specify color etc of your histograms
# ----------------------------------------------------------------------------------- #

for ispec in range( nspec ) : 

    hist_par0[ispec].SetLineColor(   color_index[ispec] )
    hist_par1[ispec].SetLineColor(   color_index[ispec] )
    hist_fisher[ispec].SetLineColor( color_index[ispec] )

    hist_par0[ispec].GetXaxis().SetTitle(   par_name[0] )
    hist_par1[ispec].GetXaxis().SetTitle(   par_name[1] )
    hist_fisher[ispec].GetXaxis().SetTitle( "Fisher Discriminant" )
    hist_fisher[ispec].GetYaxis().SetTitle( "Frequency" )

    hist_corr[ispec].SetMarkerColor( color_index[ispec] ) 
    hist_corr[ispec].SetMarkerStyle(marker_index[ispec] ) 

    hist_corr[ispec].GetXaxis().SetTitle( par_name[0] )
    hist_corr[ispec].GetYaxis().SetTitle( par_name[1] )


# ----------------------------------------------------------------------------------- #
# Plot your generated data
# ----------------------------------------------------------------------------------- #

# x and y projections
canvas_1D = TCanvas("canvas_1D", "canvas_1D", 20, 20, 1000, 600)
canvas_1D.Divide(2)

for ispec in range(nspec) : 
    canvas_1D.cd( 1 ) 
    hist_par0[ispec].Draw( draw_opt[ispec] )
    canvas_1D.cd( 2 ) 
    hist_par1[ispec].Draw( draw_opt[ispec] )

canvas_1D.Update()
if (SavePlots) : canvas_1D.SaveAs("Hist_InputVars.pdf")


# Correlation plot
canvas_2D = TCanvas("canvas_2D", "canvas_2D", 40, 40, 1000, 600)

for ispec in range(nspec) : 
    hist_corr[ispec].Draw( draw_opt[ispec] )

canvas_2D.Update()
if (SavePlots) : canvas_2D.SaveAs("Hist_InputVars2D.pdf")


# ----------------------------------------------------------------------------------- #
# start calculating discriminant here...
# ----------------------------------------------------------------------------------- #


# ----------------------------------------------------------------------------------- #
# Calculate means and widths of individual parameters
# ----------------------------------------------------------------------------------- #

mu_par0  = [0.0, 0.0]
mu_par1  = [0.0, 0.0]

for ispec in range( nspec ) : 
    mu_par0[ispec] = hist_par0[ispec].GetMean()
    mu_par1[ispec] = hist_par1[ispec].GetMean()

# ----------------------------------------------------------------------------------- #
# Now we need to get the sum of the covarance matrices
# ----------------------------------------------------------------------------------- #

covmat_sum = TMatrixD(2,2) # This will need to be generalized to more parameters 
                           # The diagonal terms are the variances of the 1d histograms
                           # While the off diagonal terms are the covariances of the
                           # 2D histogram

covmat_sum[0][0]       = sqr(hist_par0[0].GetRMS())   + sqr(hist_par0[1].GetRMS()) 
covmat_sum[0][1]       = hist_corr[0].GetCovariance() + hist_corr[1].GetCovariance()
covmat_sum[1][0]       = hist_corr[0].GetCovariance() + hist_corr[1].GetCovariance()
covmat_sum[1][1]       = sqr(hist_par1[0].GetRMS())   + sqr(hist_par1[1].GetRMS()) 

covmat_sum.Invert() #Invert matrix


# ----------------------------------------------------------------------------------- #
# Ready to calculate the fisher weights
# ----------------------------------------------------------------------------------- #

#multiply the inverted covairance matrix by the (mu_Null-mu_alt) difference
wf = [covmat_sum[0][0] * (mu_par0[0] - mu_par0[1]) + covmat_sum[0][1] * (mu_par1[0] - mu_par1[1]) , 
      covmat_sum[1][0] * (mu_par0[0] - mu_par0[1]) + covmat_sum[1][1] * (mu_par1[0] - mu_par1[1]) ]

print wf


# ----------------------------------------------------------------------------------- #
# Generate some independent data (but with same specs) as before and apply fisher
# ----------------------------------------------------------------------------------- #

for ispec in range( nspec ) : 
    for iexp in range( ndata ) : 
        values = get_corr( mean_par0[ispec], width_par0[ispec], mean_par1[ispec], width_par1[ispec], corr[ispec] )
        hist_fisher[ispec].Fill( values[0]*wf[0] + values[1]*wf[1] )


# ----------------------------------------------------------------------------------- #
# Finally go draw the resulting distribution
# ----------------------------------------------------------------------------------- #

# x and y projections
canvas_fisher = TCanvas("canvas_fisher", "canvas_fisher", 60, 60, 1000, 600)

for ispec in range(nspec) : 
    hist_fisher[ispec].Draw( draw_opt[ispec] )

canvas_fisher.Update()
if (SavePlots) : canvas_fisher.SaveAs("Hist_FisherOutput.pdf")


raw_input(" ... ")
 
# ----------------------------------------------------------------------------------- #
# Questions
# ----------------------------------------------------------------------------------- #
#
# 
# Looking at the 2D plot, you can by eye see some separation. 
# But how would you quantify this? 
# In the following we will be working towards a Fisher linear discriminant, 
# which does exactly this!
#
#
# As a measure of how good the separation obtained is, we consider the
# "distance" between the two distributions as a measure of goodness:
#   separation = (mu_NULL - mu_ALT)**2 / (sigma_NULL**2 + sigma_ALT**2)
#
# What separation do you get from the two 1D histograms of par0 and par1 using the above formula?
#
#
#
# Looking at the correlation plot. Can you give a guess of what the Fisher
# Weights should turn out to be? (roughly)
# ... or continue, on the questions below, which will lead you to the answer
#
#
# Outcomment the steps that go into the calculating of the discriminant and 
# fill in the blanks
#
# For references on how to do this, look e.g. at todays lectures or the
# wikipedia article that has been linked under week6
#
# 
# What is the separation that you get from the discriminant as compared to the 
# one dimensional histograms?
#
# ---------------------------------------------------------------------------
# Optional below. Don't use more than 10-15 minutes on this
# ---------------------------------------------------------------------------
#
# Try to change the mean, widths and correlations of the generated samples.
# When does the discrimiant seem to perform optimally?
# Can you construct cases where it does not work?
# 
# Change the number of data points to 100 and see what happens.
# Are the weights you calculate constant?
#
# ----------------------------------------------------------------------------------- #