# Note: The only difference to the PCA is search/replace KernelPCA with PCA
# Author: Carl-Johannes Johnsen

import numpy as np
from sklearn.decomposition import KernelPCA
import matplotlib.pyplot as plt
from sklearn.preprocessing import quantile_transform

Btag1 = np.loadtxt('AlephBtag_MC_small_v2.csv',skiprows=1)
X = Btag1[:20000,:-1]
y = Btag1[:20000,-1]
X = quantile_transform(X, copy=True)
pca = KernelPCA(n_components=2)
pca.fit(X)

pca_x = pca.transform(X)
truths = np.array([pca_x[i] for i in range(len(y)) if y[i] == 1])
falses = np.array([pca_x[i] for i in range(len(y)) if y[i] != 1])
plt.scatter(truths[:,0], truths[:,1], color='red', alpha=.1)
plt.scatter(falses[:,0], falses[:,1], color='blue', alpha=.1)
plt.show()