%%  Test Kernel PCA on Various High-D Datasets. 
%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%               Load High-D kPCA Testing Datasets             %%
%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% 1a) Load High-D Breast Cancer Wisconsin Dataset
% Breast-Cancer-Wisconsin Dataset 
clear all; close all; clc;
[X,labels,class_names] = ml_load_data('breast-cancer-wisconsin.csv','csv','last');

% Transpose data to have columns as datapoints
X = X'; labels = labels';

% Adjust data to N x M (dimension x samples)
[N,M] = size(X);

% Plot original data
plot_options            = [];
plot_options.is_eig     = false;
plot_options.labels     = labels;
plot_options.class_names = {'Benign','Malignant'};
plot_options.title      = 'Breast-Cancer-Wisconsin Dataset';

viz_dim = [1:9];
if exist('h1','var') && isvalid(h1), delete(h1);end
h1 = ml_plot_data(X(viz_dim,:)',plot_options);


%% 1b) Load High-D Ionosphere Dataset
% Ionosphere Dataset 
clear all; close all; clc;
[X,labels,class_names] = ml_load_data('ionosphere.csv','csv','last');

% Transpose data to have columns as datapoints
X = X'; labels = labels';

% Adjust data to N x M (dimension x samples)
[N,M] = size(X);

% Plot original data
plot_options            = [];
plot_options.is_eig     = false;
plot_options.labels     = labels;
plot_options.title      = 'Ionosphere Dataset';

viz_dim = [1:4:32];

if exist('h1','var') && isvalid(h1), delete(h1);end
h1 = ml_plot_data(X(viz_dim,:)',plot_options);

%% 1c) Load High-D Hayes-Roth Dataset
% Hayes-Roth Dataset 
clear all; close all; clc;
[X,labels,class_names] = ml_load_data('hayes-roth.csv','csv','last');

% Transpose data to have columns as datapoints
X = X'; labels = labels';

% Adjust data to N x M (dimension x samples)
[N,M] = size(X);

% Plot original data
plot_options            = [];
plot_options.is_eig     = false;
plot_options.labels     = labels;
plot_options.title      = 'Hayes Roth Dataset';

viz_dim = [1:N];

if exist('h1','var') && isvalid(h1), delete(h1);end
h1 = ml_plot_data(X(viz_dim,:)',plot_options);


%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%               2) Apply PCA on Dataset                     %%
%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% 2a) Compute PCA with ML_toolbox
options = [];
options.method_name       = 'PCA';
options.nbDimensions      = N;
[pca_X, mappingPCA]       = ml_projection(X',options);

% Extract Principal Directions, Components and Projection
V  = mappingPCA.M;      % Eigenvectors
L  = mappingPCA.lambda; % Eigenvalues diagonal
Mu = mappingPCA.mean';  % Mean of Dataset (for reconstruction)

% Plot EigenValues to try to find the optimal "p"
if exist('h2a','var') && isvalid(h2a), delete(h2a);end
h2a = ml_plot_eigenvalues(diag(L));

% OR

% Find 'p' which yields the desired 'Explained Variance'
Var = 0.9;
[ p ] = ml_explained_variance( diag(L), Var );

%% 2b) Compute Mapping Function and Visualize Embedding
p = 4;
A = V(:,1:p)';
% Compute the new embedded points
% y = A*x (linear projection)
y = A*X;

% Plot PCA projections
if exist('h2b','var') && isvalid(h2b), delete(h2b);end
plot_options             = [];
plot_options.is_eig      = false;
plot_options.labels      = labels;
plot_options.plot_labels = {'$y_1$','$y_2$','$y_3$'};
plot_options.title       = 'Projected data with linear PCA';
h2b = ml_plot_data(y',plot_options);

%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%            3)  Apply Kernel PCA on Dataset                 %%
%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% 3a) Compute kernel PCA of Dataset and Check Eigenvalues
% Compute kPCA with ML_toolbox
options = [];
options.method_name       = 'KPCA';   % Choosing kernel-PCA method
options.nbDimensions      = N;       % Number of Eigenvectors to keep.
options.kernel            = 'gauss';  % Type of Kernel: {'poly', 'gauss'}
options.kpar              = [0.01];   % Variance for the RBF Kernel
                                      % For 'poly' kpar = [offset degree]
[kpca_X, mappingkPCA]     = ml_projection(X',options);

% Extract Eigenvectors and Eigenvalues
V     = mappingkPCA.V;
K     = mappingkPCA.K;
L     = sqrt(mappingkPCA.L);

% Plot EigenValues to try to find the optimal "p"
if exist('h3a','var') && isvalid(h3a), delete(h3a);end
h3a = ml_plot_eigenvalues(diag(L));

%% 3b) Choose p, Compute Mapping Function and Visualize Embedded Points 

% Choosen Number of Eigenvectors to keep
p = 3;
dims = [1:p];

% Compute square root of eigenvalues matrix L
sqrtL = diag(L);

% Compute inverse of square root of eigenvalues matrix L
invsqrtL = diag(1 ./ diag(sqrtL));

% Compute the new embedded points
%y = 1/lambda * sum(alpha)'s * Kernel
% y = sqrtL(1:p,1:p) * V(:,1:p)';
y = invsqrtL(dims,dims) * V(:,dims)' * K;

% Plot result of Kernel PCA
if exist('h3','var') && isvalid(h3), delete(h3);end
plot_options              = [];
plot_options.is_eig       = false;
plot_options.labels       = labels;
plot_options.plot_labels  = {'$y_1$','$y_2$','$y_3$'};
plot_options.title        = 'Projected data with kernel PCA';
if exist('h3b','var') && isvalid(h3b), delete(h3b);end
h3b = ml_plot_data(y',plot_options);


%% 3c) Plot Isolines of EigenVectors
iso_plot_options                    = [];
iso_plot_options.xtrain_dim         = [1 2];    % Dimensions of the orignal data to consider when computing the gramm matrix (since we are doing 2D plots, original data might be of higher dimension)
iso_plot_options.eigen_idx          = [1:p];    % Eigenvectors to use.
iso_plot_options.b_plot_data        = true;     % Plot the training data on top of the isolines 
iso_plot_options.labels             = labels;   % Plotted data will be colored according to class label
iso_plot_options.b_plot_colorbar    = true;     % Plot the colorbar.
iso_plot_options.b_plot_surf        = false;    % Plot the isolines as (3d) surface 

% Construct Kernel Data
kernel_data                         = [];
kernel_data.alphas                  = V;
kernel_data.kernel                  = mappingkPCA.kernel;
kernel_data.kpar                    = [mappingkPCA.param1,mappingkPCA.param2];
kernel_data.xtrain                  = X';
kernel_data.eigen_values            = L;

if exist('h_isoline','var') && isvalid(h_isoline), delete(h_isoline);end
[h_isoline,h_eig] = ml_plot_isolines(iso_plot_options,kernel_data);


%% 3d) Grid Search on the Gaussian kernel hyperparameter
grid_options = [];
grid_options.method_name       = 'KPCA';
grid_options.nbDimensions      = 10;     % Maximum number of Eigenvectors
grid_options.kernel            = 'gauss';% Kernel Type

%%% Example RBF Kernel: 1 Row of sigma values %%% for 'gauss' kernel %%%
% kpars = [0.001,0.01, 0.05,0.1,0.2, 0.5, 1, 2, 10, 20, 30, 40, 50]; 
kpars = logspace(log10(0.5),log10(10),10);

%%% Example Polyomial: 1st Row offset, 2nd Row Degree %%% 'poly' kernel %%%
% kpars = [1 1 1 1; 1 2 3 4];   % For 'poly': [offset; order]

[ eigenvalues ] = ml_kernel_grid_search(X',grid_options,kpars);

if exist('h_eig2','var')     && isvalid(h_eig2),     delete(h_eig2);    end
h_eig2  = ml_plot_kpca_eigenvalues(eigenvalues,kpars);




