% Script for analyzing window size results in random data

% Copyright (c) 2015, Jefrey Lijffijt
%
% Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
%
% The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
%
% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OU
T OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

%% Clear workspace
clear;

%% Experiment 1: solution stability for Clara parameters
% Repeat algorithms on 1 data set with n = 1999, p = 0.1, k = 1:4

reps = 100;
n = 1099;
p = 0.1;
windowsizes = 1:1000;
k = 1:4;
nrep = [10 20 40 80];
samplesize = [40 80 160 320];
plusplus = true;

err = zeros(reps,length(k),length(nrep),length(samplesize));
ws = cell(reps,length(k),length(nrep),length(samplesize));
errpam = zeros(reps,length(k));
wspam = cell(reps,length(k));

% Generate random data
X = simulateFixedrate(n, p);
tic;
Trace = computeFrequencyMatrix(X, 1, windowsizes);
fprintf('%s\n',['Frequency matrix computed in ' num2str(toc) ' seconds.']);
    
% Compute trace matrix
for g = 1:reps
    parfor h = 1:4
        % Compute clustering
        tic;
        for i = 1:4
            for j = 1:4
                [ws{g,h,i,j},err(g,h,i,j)] = clara(Trace,k(h),nrep(i),samplesize(j),plusplus); %#ok<PFBNS>
            end
        end
        [wspam{g,h}, errpam(g,h)] = kmedoidspp(Trace,k(h),plusplus);
        fprintf('%s\n',['rep = ' num2str(g) ', k = ' num2str(k(h)) ...
            ' clustering computed in ' num2str(toc) ' seconds.']);
    end
end

clear g h i j Trace;

%% Save error
save dami_random1.mat X err errpam k n nrep p plusplus reps ...
    samplesize windowsizes ws wspam;

%% Clear workspace
clear;

%% Experiment 2: solution stability under data randomization
% Generate 100 data sets with n = 1999, p = 0.1

reps = 100;
n = 1999;
p = 0.1;
windowsizes = 1:1000;
k = 1:4;
nrep = 80;
samplesize = 80;
plusplus = true;

err = zeros(reps,length(k));
ws = cell(reps,length(k));

% Generate random data
X = simulateFixedrate(n, p);
    
parfor i = 1:reps
    % Randomizat data
    Xi = X(randperm(n)); %#ok<PFBNS>
    
    % Compute trace matrix
    tic;
    Trace = computeFrequencyMatrix(Xi, 1, windowsizes);
    fprintf('%s\n',['Frequency matrix ' num2str(i) ' computed in ' ...
        num2str(toc) ' seconds.']);
    
    % Compute clustering
    for j = 1:4
        tic;
        [ws{i,j},err(i,j)] = clara(Trace,k(j),nrep,samplesize,plusplus); %#ok<PFBNS>
        fprintf('%s\n',['k = ' num2str(k(j)) ' clustering computed in ' ...
            num2str(toc) ' seconds.']);
    end
end

clear i Xi Trace j;

%% Save error
save dami_random2.mat X err k n nrep p plusplus reps samplesize ...
    windowsizes ws;

%% Clear workspace
clear;

%% Experiment 3: finding the required number of samples
% Generate 1 data set with n = 100K, p = 0.01, c = 16

windowsizes = 1:6250;
ncols = 2.^(0:12);
reps = 100;
k = 3;
nrep = 400;
samplesize = 100;
plusplus = true;

% Generate random data
X = [];
n = 100000;
p = 0.1;
c = 16;
% X = simulateVariablerate(n, p, c);
load example.mat;

err = zeros(length(ncols),reps);
ws = cell(length(ncols),reps);

for i = 1:length(ncols)
    parfor j = 1:reps
        % Compute trace matrix    
        tic;
        Trace = computeFrequencyMatrixSample(X, 1, windowsizes, ncols(i)); %#ok<PFBNS>
        fprintf('%s\n',['Frequency matrix ' num2str(i) ' computed in ' ...
            num2str(toc) ' seconds.']);

        % Compute clustering
        tic;
        [ws{i,j},err(i,j)] = clara(Trace,k,nrep,samplesize,plusplus);
        fprintf('%s\n',['j = ' num2str(j) ' clustering computed in ' ...
            num2str(toc) ' seconds.']);
    end
end

clear i j Trace;

%% Save error
save dami_random3.mat c err k n ncols nrep p plusplus reps samplesize ...
    windowsizes ws;

%% Clear workspace
clear;

%% Experiment 4: finding the scale in a variable rate process
% Generate 1 data set with n = 100K, p = 0.01, c = 16

windowsizes = 1:20000;
ncols = 1000;
k = 3;
nrep = 40;
samplesize = 40;
plusplus = true;

% Generate random data
X = [];
n = 100000;
p = 0.1;
c = 16;
% X = simulateVariablerate(n, p, c);
load example.mat;

err = zeros(1,1);
ws = cell(1,1);

% Compute trace matrix
tic;
Trace = computeFrequencyMatrixSample(X, 1, windowsizes, ncols);
fprintf('%s\n',['Frequency matrix computed in ' num2str(toc) ' seconds.']);

% Compute clustering
tic;
[ws{1}, err(1)] = clara(Trace,k,nrep,samplesize,plusplus);
fprintf('%s\n',['k = ' num2str(k) ' clustering computed in ' ...
    num2str(toc) ' seconds.']);

% Compute trace matrix permuted matrix
tic;
Trace = computeFrequencyMatrixSample(X_permuted, 1, windowsizes, ncols);
fprintf('%s\n',['Frequency matrix computed in ' num2str(toc) ' seconds.']);

% Compute clustering permuted matrix
tic;
[ws_perm{1}, err_perm(1)] = clara(Trace,k,nrep,samplesize,plusplus);
fprintf('%s\n',['k = ' num2str(k) ' clustering computed in ' ...
    num2str(toc) ' seconds.']);

clear Trace;

%% Save error
save dami_random4.mat c err err_perm k n ncols nrep p plusplus ...
    samplesize windowsizes ws ws_perm;