#!/usr/bin/env python

# Above statement tells your terminal that the script should be run in 
# a python enviroment.

# --------------------------------------------------------- #
#  Script calculating all prime numbers up to 10000
#
#  Author:  Troels C. Petersen (Niels Bohr Institute)
#  Date:    06-10-2015 (originally)
# --------------------------------------------------------- #

from __future__ import print_function, division
import numpy as np               # Import the numpy library
import matplotlib.pyplot as plt
from iminuit import Minuit
from probfit import Chi2Regression #, BinnedChi2, UnbinnedLH, BinnedLH

SavePlots = False

Nmax = 10000              # Maximum prime number
N    = 2                  # Starting value


# =============================================================================
# Initial functions:
# =============================================================================

# Function to create a nice string output:
def nice_string_output(names, values, extra_spacing = 2):
    max_values = len(max(values, key=len))
    max_names = len(max(names, key=len))
    string = ""
    for name, value in zip(names, values):
        string += "{0:s} {1:>{spacing}} \n".format(name, value,
                   spacing = extra_spacing + max_values + max_names - len(name))
    return string[:-2]


# =============================================================================
# Main program:
# =============================================================================

# Calculating prime numbers comes down to taking a number (here N),
# and testing if it is prime by dividing it by all numbers up the
# the square root of it. If no number divides N (tested using the 
# "%" operator, which returns the remainder: 6%3 = 0, 5%2 = 1, etc.),
# then N is prime!

# See if you can follow this in python program form below. There 
# are only 9 lines in total! The central part consists of a While-loop,
# which loops as long as the statement is true.
# It is true to begin with (N=2, Nmax = 10000), but at some point
# N will have increased enough to equal Nmax, and the looping stops.

# Create empty list for primes. We dont create a numpy list since we
# don't know the size of it before creation:
primes = []

while (N < Nmax):
    # print "Potential Prime number I want to test: ", N
    hit = False                # Define a variable "hit", with starting value "False".

    for i in range (2,int(np.sqrt(N))+1) :   # Loop over the possible divisors.
        # print "  I'm testing the first number with the second and get the third: ", N, i, N%i

        if (N%i == 0):         # If the remainder of the integer division is 0
            hit = True         # hit = "True" (i.e. the number being tested had a divisor, and is not a prime)
            break              # Skip the rest of the possible divisors.

    if (not hit) :             # If no number gave a perfect integer division (i.e. with remainder 0)...
        primes.append(N)
        print("I got a prime", N)   # ...the it is a prime number. Fill it into the histogram.

    N += 1                     # Increase N by one.

# All primes up to Nmax as a numpy array
primes = np.array(primes)
print()
print("First 10 primes, ", primes[:10]) # prints the first 10 primes

# Check that there are no even primes except 2:
print("  The even prime numbers are listed here: ", primes[(primes % 2) == 0])

# This algorithm gives the prime numbers up to Nmax = 10000!
# Think it through, and see if you understand it...


# --------------------------------------------------------- #
# Draw output:
# --------------------------------------------------------- #

Nbins = 100
xmin = 0
xmax = Nmax

# Create just a single figure and axes
fig, ax = plt.subplots(figsize=(12, 6))

# The plot of the histograms, we want as a graph with error bars and not a regular histogram.
# Therefore we don't do ax.hist(primes, bins=Nbins, range=(xmin, xmax), histtype='step', lw=2, ),
# but rather:
y, bin_edges = np.histogram(primes, bins=Nbins, range=(xmin, xmax), normed=False)
x = 0.5*(bin_edges[1:] + bin_edges[:-1])       # Calculate the x-values as the center of the bins.
sy = np.sqrt(y)                                # Assume Poissonian errors (more on that later!)
ax.errorbar(x, y, sy, fmt='.', label='Prime number distribution')

ax.set_xlim(xmin, xmax)
ax.set_xlabel("Prime Number")
ax.set_ylabel("Frequency")
ax.set_title("Prime number density distribution")


# The Prime Number Theorem (http://en.wikipedia.org/wiki/Prime_number_theorem) states,
# that the number of prime numbers follows roughly the function f(x) = 1/ln(x).

# We have binned our prime numbers (see histogram definition at the top), so we
# must have a function with a few more degrees of freedom: f(x) = c0 + c1/ln(x),
# where c0 and c1 are constants that needs to be fitted.

def fit_prime(x, c0, c1) :
    return c0 + c1 / np.log(x)


# Make a chi2-regression object based on the fit function fit_prime and the x, y and sy values. 
chi2_object = Chi2Regression(fit_prime, x, y, sy)
minuit = Minuit(chi2_object, pedantic=False, c0=0, c1=10)   # Initializes the fit with given initial values
minuit.migrad()                         # Fit the function to the data
c0, c1 = minuit.args                    # Get the output arguments
for name in minuit.parameters:          # Print the output (parameters of the fit)
    print("Fit value: {0} = {1:.5f} +/- {2:.5f}".format(name, minuit.values[name], minuit.errors[name]))

x_fit = np.linspace(xmin, xmax, 1000)   # Create the x-axis for the plot of the fitted function
y_fit = fit_prime(x_fit[1:], c0, c1)    # Not from x[0] since x[0] is 0, which is not defined for the log!
ax.plot(x_fit[1:], y_fit, '-', label='Prime number fit result')
ax.legend(loc='best')

ax.text(0.5, 0.6, r'Fit function: f(x) = $C_0 + \frac{C_1}{\log (x)}$', \
        transform = ax.transAxes, fontdict={'size': 18}) # transform so relative coordinates


# Here we calculate the chi2 value of the fit and the number of non-empty bins (for Ndof):
chi2_val = 0
N_NotEmptyBin = 0
for values in zip(x, y, sy) :
    if values[2] > 0:
        f = fit_prime( values[0], c0, c1)          # Calc the model value
        residual  = ( values[1]-f ) / values[2]    # Find the uncertainty-weighted residual
        chi2_val += residual**2                    # The chi2-value is the squared residual
        N_NotEmptyBin += 1                         # Count the bin as non-empty since sy>0 (and thus y>0)

Ndof = N_NotEmptyBin - len(minuit.args)            # Number of degrees-of-freedom

from scipy import stats
chi2_prob =  stats.chi2.sf(chi2_val, Ndof)

# Put result of the fit in plot:
names = ['Entries', 'Chi2/ndf', 'Prob', 'C0', 'C1']
values = [  "{:d}".format(N_NotEmptyBin),
            "{:.3f} / {:d}".format(chi2_val, Ndof), 
            "{:.3f}".format(chi2_prob),
            "{:.3f} +/- {:.3f}".format(minuit.values['c0'], minuit.errors['c0']),
            "{:.3f} +/- {:.3f}".format(minuit.values['c1'], minuit.errors['c1']),
          ]

# Place a text box in upper left in axes coords
ax.text(0.02, 0.95, nice_string_output(names, values), family='monospace', transform=ax.transAxes, fontsize=12, verticalalignment='top')

plt.tight_layout()
plt.show(block=False)

if SavePlots:
    fig.savefig('PrimeNumberDistribution.pdf', dpi=600)

# Finally, ensure that the program does not termine (and the plot disappears), before you press enter if not iPython:
try: 
    __IPYTHON__
except: 
    raw_input('Press Enter to exit')