%%% Clustering_by_co-initializaton: repeating the 1st and 2nd experiments
%%% in He Zhang et al.:Improving Cluster Analysis By Co-initializations. 
%%% Pattern Recognition Letters, 2014. Accepted. To appear. 

%%% For the 2nd experiment: label_cts (also label_co) contains clustering labels from 3 agglomerative
%%% clustering algorithms (see LinkClue package note for details), and we
%%% utilized the second column of labels as the output, i.e., the result from
%%% Complete-Linkage (CL) hierarchical clustering algorithm.

%%% He Zhang, Oct. 2013

warning('off');
addpath('1SpectralClustering');
addpath('Ncut_9');
addpath(genpath('./Cluster Ensemble Toolbox'));

%% the 19 datasets
datasets = {
    'orl', ...
    'med', ...
    'vowel', ...
    'coil20', ...
    'semeion', ...
    'faults', ...
    'segment', ...
    'cora', ...
    'citeseer', ...
    '7sectors', ...
    'optdigits', ...
    'svmguide1', ...
    'zip', ...
    'usps', ...
    'pendigits', ...
    'protein', ...
    '20news', ...
    'let_rec', ...
    'mnist'
    };
nd = length(datasets);

%% testing in each dataset
results_km_all = cell(nd, 3);
multi_rand_init_all = cell(nd, 3);
ncut_all = cell(nd, 3);
ncut_bases = cell(1, 3); % the ensemble bases from NCUT initialization
ncut_init_all = cell(nd, 4);
meta_init_all = cell(nd, 5);

results_bc_all = cell(nd, 3);
results_cts_all = cell(nd, 3);
results_co_all = cell(nd, 3);

for ds = 1:nd
    % dataset name
    dname = datasets{ds};
    fprintf('\n*** Num = %d: dataset = %s ***\n', ds, dname);
    
    % load multivariate data for K-means clustering
    fprintf('\nRunning K-means ...\n\n');
    data = load(['./datasets_multivariate/', datasets{ds}]);
    results_km_all(ds, :) = func_kmeans_19datasets(data, dname);
    
    % load graph data for random_init, simple_init, heter_init, and heter_co_init;
    data = load(['./datasets_graph/',datasets{ds}]);
    label = data.C; k = length(unique(label));
    
    % random_init with 50 different random seeds
    fprintf('\nRunning random_init (50 seeds) ...\n\n');
    multi_rand_init_all(ds, :) = func_multiple_rand_init(data, dname);
    
    % simple_init, heter_init, and heter_co_init
    fprintf('\nRunning simple_init, heter_init, and heter_co_init:\n');
    [ncut_all(ds, :), ncut_bases, ncut_init_all(ds, :), meta_init_all(ds, :)] = func_meta_init(data, dname);
    
    % display clustering results of the 1st exp.
    display_clustering_results_exp1(ds, results_km_all(ds, :), multi_rand_init_all(ds, :), ncut_all(ds, :), ncut_init_all(ds, :), meta_init_all(ds, :));
    
    % the BESTCLUSTERING ensemble clustering
    fprintf('\nRunning BESTCLUTERING ensemble clustering ...\n\n');
    label_bc = func_ensemble_bestclustering(ncut_bases);
    results_bc_all{ds, 1} = dname;
    results_bc_all{ds, 2} = compute_purity_entropy(label, label_bc);
    results_bc_all{ds, 3} = compute_nmi(label, label_bc);
    
    % the link-based (CTS) ensemble clustering (LinkClue package)
    fprintf('\nRunning link-based (CTS) ensemble clustering ...\n\n');
    dc = 0.8; % the decay factor as suggested in LinkClue package
    label_cts = func_ensemble_linkclue(ncut_bases, k, dc);
    results_cts_all{ds, 1} = dname;
    results_cts_all{ds, 2} = compute_purity_entropy(label, label_cts(:, 2)); % the Complete-Linkage (CL) clustering
    results_cts_all{ds, 3} = compute_nmi(label, label_cts(:, 2));
    
    % the co-association-matrix-based ensemble clustering
    fprintf('\nRunning co-association-matrix-based ensemble clustering ...\n\n');
    label_co = func_ensemble_comatrix(ncut_bases, k);
    results_co_all{ds, 1} = dname;
    results_co_all{ds, 2} = compute_purity_entropy(label, label_co(:, 2));
    results_co_all{ds, 3} = compute_nmi(label, label_co(:, 2));
    
    % display clustering results of the 2nd exp.
    display_clustering_results_exp2(ds, results_bc_all(ds, :), results_co_all(ds, :), results_cts_all(ds, :), meta_init_all(ds, :));
    
    % save clustering results
    save('./results/results_clustering_test', 'results_km_all', ...
        'multi_rand_init_all', 'ncut_all', 'ncut_init_all', 'meta_init_all', ...
        'results_bc_all', 'results_cts_all', 'results_co_all');
end
















