function collectRecombinationStatistics(rootDirectory, datasetName)
    
    delimiter = getDelimiter();
    rootToResults = [rootDirectory datasetName 'Results' delimiter];
    rootToSummaries = [rootDirectory datasetName 'Summaries' delimiter];
    load([rootToSummaries 'geneNames.mat']);
    
    % Check if result files exist, and has size>0:
    if 1==1
        
        resultReady = ones(length(geneNames),1);
        for geneIndex = 1:length(geneNames)
            
            nameToRun = geneNames{geneIndex};
            
            resultDir = [rootToResults nameToRun delimiter];
            
            fileToCheck = [resultDir 'output' delimiter 'recombinations_ancestral.txt'];
            
            % This is the file that is created last in the analysis
            if ~exist(fileToCheck)
                disp([num2str(geneIndex) ': ' nameToRun ' does not exist.']);
                resultReady(geneIndex) = 0;
            else
                bytes = getfield(dir(fileToCheck), 'bytes');
                if bytes == 0
                    disp([num2str(geneIndex) ': ' nameToRun ' does not exist.']);
                    resultReady(geneIndex) = 0;
                end
            end
        end
    end
    
    
    % Check the number of SNPs etc. in each alignment
    if 1==1
        
        nSnps = zeros(1, length(geneNames));
        nStrains = zeros(1, length(geneNames));
        nClusters = zeros(1, length(geneNames));
        nLineages = zeros(1, length(geneNames));
        
        for geneIndex = 1:length(geneNames)
            if rem(geneIndex,20)==0
                disp([num2str(geneIndex) '/' num2str(length(geneNames))]);
            end
            
            nameToRun = geneNames{geneIndex};
            
            resultDir = [rootToResults nameToRun delimiter];
            
            fileToCheck = [resultDir nameToRun '_res.mat'];
            if exist(fileToCheck)
            
                load(fileToCheck); % uData
                load([resultDir nameToRun '_res_snpData.mat']); % snpData
                
                if isfield(uData, 'groupedPartition')
                    nSnps(geneIndex) = length(uData.snpPositions);
                    nStrains(geneIndex) = length(uData.partition);
                    nClusters(geneIndex) = max(uData.partition);
                    nLineages(geneIndex) = max(uData.groupedPartition);
                else
                    nSnps(geneIndex) = -1;
                    nStrains(geneIndex) = -1;
                    nClusters(geneIndex) = -1;
                    nLineages(geneIndex) = -1;
                end
            else
                nSnps(geneIndex) = -1;
                nStrains(geneIndex) = -1;
                nClusters(geneIndex) = -1;
                nLineages(geneIndex) = -1;
            end
            
        end
        
        save([rootToSummaries datasetName 'GeneStatistics.mat'], 'nSnps', 'nStrains', 'nClusters', 'nLineages');
        
    end
    
    
    % Count the number of ancestral recombinations
    if 1==1
        
        disp('Counting ancestral recombinations');
        numAncestralRecombinations = -1 .* ones(length(geneNames),1);
        for geneIndex = 1:length(geneNames)
            
            nameToRun = geneNames{geneIndex};
            
            if rem(geneIndex,20)==0
                disp([num2str(geneIndex) '/' num2str(length(geneNames))]);
            end
            
            resultDir = [rootToResults nameToRun delimiter];
            
            ancestralResFile = [resultDir 'output' delimiter 'recombinations_ancestral.txt'];
            if exist(ancestralResFile)
                fid = fopen(ancestralResFile);
                numLinesInFile = 0;
                rowNow = fgetl(fid);
                while ischar(rowNow)
                    numLinesInFile = numLinesInFile + 1;
                    rowNow = fgetl(fid);
                end
                fclose(fid);
                numAncestralRecombinations(geneIndex) = numLinesInFile - 2;
            end
        end
        save([rootToSummaries 'numAncestralRecombinations.mat'],'numAncestralRecombinations');
    end
    
    
    % Count the number of recent recombinations
    if 1==1
        
        disp('Counting recent recombinations');
        numRecentRecombinations = -1.*ones(length(geneNames),1);
        
        for geneIndex = 1:length(geneNames)
            
            if rem(geneIndex,20)==0
                disp([num2str(geneIndex) '/' num2str(length(geneNames))]);
            end
            
            nameToRun = geneNames{geneIndex};
            
            resultDir = [rootToResults nameToRun delimiter];
            
            resultFile = [resultDir nameToRun '_res.mat'];
            if exist(resultFile)
                load(resultFile); % uData
                if isfield(uData, 'groupedPartition')
                    res = countRecentRecombinations(uData);
                    numRecentRecombinations(geneIndex) = res.nRecent;
                end
            end
            
        end
        save([rootToSummaries 'numRecentRecombinations.mat'],'numRecentRecombinations');
    end
    
    
    
    % Write collected results to a file
    if 1==1

        % Ancestral recombinations vs. number of sequences
        load([rootToSummaries 'numAncestralRecombinations.mat']); % 'numAncestralRecombinations'
        load([rootToSummaries 'numRecentRecombinations.mat']); % numRecentRecombinations
        load([rootToSummaries datasetName 'GeneStatistics.mat']); % 'nSnps', 'nStrains', 'nClusters', 'nLineages', 'nCopies', 'nDistinctCopies'
        table = [nSnps', nStrains', nClusters', nLineages', numAncestralRecombinations, numRecentRecombinations];
        %table = table(1:1171,:);
        maxNameLength = max(cellfun(@length, geneNames));
        fid = fopen([rootToSummaries datasetName '_recSummaries.txt'],'w');
        fprintf(fid, ['COG' blanks(maxNameLength - 1) 'SNPs      sequences clusters  lineages  ancestral recent\n']);
        for i=1:size(table,1)
            rowToPrint = [geneNames{i} ' ' num2str(table(i,:))];
            fprintf(fid, ['%-' num2str(maxNameLength+2) 's%-10.0f%-10.0f%-10.0f%-10.0f%-10.0f%-10.0f\n'], geneNames{i}, table(i,:));
        end
        fclose(fid);
        disp(num2str(sum(numRecentRecombinations)));
    end
    
    
    % Compute the proportion of shared ancestry
    if 1==1 && exist([rootToSummaries 'globalNames.mat'], 'file')
        
        disp('Computing shared ancestry');
        load([rootToSummaries 'globalNames.mat']); % globalNames
        
        globalLengthShared = zeros(length(globalNames));
        globalLengthCompared = zeros(length(globalNames));
        
        for geneIndex = 1:length(geneNames)
            
            disp([num2str(geneIndex) '/' num2str(length(geneNames))]);
            nameToRun = geneNames{geneIndex};
            
            computeProportionPrivateAncestry(nameToRun, globalNames, [], 'shared', rootToResults);
            % This function creates file nameToRun_ancestry_shared.mat in
            % the result folder of the gene
            
            resultFileName = [rootToResults nameToRun delimiter nameToRun '_ancestry_shared.mat'];
            load(resultFileName); % 'lengthShared', 'lengthCompared'
            
            globalLengthShared = globalLengthShared + double(lengthShared);
            globalLengthCompared = globalLengthCompared + double(lengthCompared);
            
        end
        
        save([rootToSummaries 'proportionSharedAncestry.mat'], 'globalLengthShared', 'globalLengthCompared');
    end
    
    
    % Compute interaction intensity
    if 1==0
        
        disp('Computing interaction intensity');
        load([rootToSummaries 'globalNames.mat']); % globalNames
        nStrains = length(globalNames);
        
        globalNumEvents = zeros(nStrains);
        globalLengthInDifferentLineages = zeros(nStrains);
        
        for geneIndex = 1:length(geneNames)
            
            disp([num2str(geneIndex) '/' num2str(length(geneNames))]);
            nameToRun = geneNames{geneIndex};
            
            computeProportionPrivateAncestry(nameToRun, globalNames, [], 'interaction', rootToResults);
            % This function creates file nameToRun_interaction.mat in
            % the result folder of the gene
            
            resultFileName = [rootToResults nameToRun delimiter nameToRun '_interaction.mat'];
            load(resultFileName); % 'numEvents', 'lengthInDifferentLineages'
            
            globalNumEvents = globalNumEvents + double(numEvents);
            globalLengthInDifferentLineages = globalLengthInDifferentLineages + double(lengthInDifferentLineages);
            
        end
        
        save([rootToSummaries 'interactionMatrix.mat'], 'globalNumEvents', 'globalLengthInDifferentLineages');
    end
    
    % Update the uData.snpDataFileName, 
    % because in the cluster the root directory is different 
    % from work station.
    if 1==0
        for geneIndex = 1:length(geneNames)
            
            disp([num2str(geneIndex) delimiter num2str(length(geneNames))]);
            nameToRun = geneNames{geneIndex};
            
            resultDir = [rootToResults nameToRun delimiter];
            
            fileToCheck = [resultDir nameToRun '_res.mat'];
            load(fileToCheck); % uData
            
            uData.outputDir = [resultDir 'output'];
            uData.snpDataFileName = [resultDir nameToRun '_res_snpData.mat'];
            save(fileToCheck, 'uData');
            
        end
    end
    
    
%     % Compute the proportion of private ancestry (ONLY FOR SPARC CURRENTLY)
%     if 1==0
%         
%         load('nameConversion.mat'); % 'ersNames','faNames','errNames'
%         globalNames = ersNames;
%         clear ersNames faNames errNames;
%         
%         globalLengthPrivate = zeros(length(globalNames),1);
%         globalLengthInvestigated = zeros(length(globalNames),1);
%         
%         % Find out global BAPS partition
%         metaData = readMetaData; % origNames, baps, errNames
%         % Note: isequal(metaData.errNames, errNames) is =TRUE.
%         % Therefore, the baps gives immediately the BAPS partition for the
%         % globalNames.
%         globalPartition = metaData.baps(1:length(globalNames));
%         
%         for geneIndex = 1:length(geneNames)
%             
%             disp([num2str(geneIndex) '/' num2str(length(geneNames))]);
%             nameToRun = geneNames{geneIndex};
%             
%             % Compute the results
%             %computeProportionPrivateAncestry(nameToRun, globalNames, globalPartition, 'private');
%             
%             % Load the results
%             resultDir = '/triton/ics/scratch/mi/pemartti/Brat4/sparc_results/';
%             resultFileName = [resultDir 'results/' nameToRun '/' nameToRun '_private_ancestry.mat'];
%             load(resultFileName); % lengthPrivate lengthInvestigated
%             globalLengthPrivate = globalLengthPrivate + lengthPrivate;
%             globalLengthInvestigated = globalLengthInvestigated + lengthInvestigated;
%             
%         end
%         save('/triton/ics/scratch/mi/pemartti/Brat4/source/sparc_analysis/resultSummaries/proportionPrivateAncestry.mat', 'globalLengthPrivate', 'globalLengthInvestigated');
%         
%     end
%     
%     
%     % Compute the distances between SCs (ONLY FOR SPARC CURRENTLY)
%     if 1==0
%         
%         load('nameConversion.mat'); % 'ersNames','faNames','errNames'
%         globalNames = ersNames;
%         clear ersNames faNames errNames;
%         
%         collectedScDistances = zeros(15,15,length(geneNames));
%         
%         metaData = readMetaData; % origNames, baps, errNames
%         % Note: isequal(metaData.errNames, errNames) is =TRUE.
%         % Therefore, the baps field gives immediately the BAPS partition 
%         % for the globalNames.
%         globalPartition = metaData.baps(1:length(globalNames));
%         
%         for geneIndex = 1:length(geneNames)
%             
%             disp([num2str(geneIndex) '/' num2str(length(geneNames))]);
%             nameToRun = geneNames{geneIndex};
%             
%             collectedScDistances(:,:,geneIndex) = computeScDistances(nameToRun, globalNames, globalPartition);
%             
%         end
%         
%         save('/triton/ics/scratch/mi/pemartti/Brat4/source/sparc_analysis/resultSummaries/scDistances.mat', 'collectedScDistances');
%     end
    
    
end




function [numAsDonor, numAsRecipient, numAsRecipientFromOutside] = addToTheGlobalCounts(...
    numAsDonor, numAsRecipient, numAsRecipientFromOutside, globalNames, ...
    localNumAsDonor, localNumAsRecipient, localNumAsRecipientFromOutside, ...
    rawLocalNames)
    
    localNames = extractShortLabels(rawLocalNames);
    
    % Find the indices of local names within global names
    [~,localIndices] = ismember(localNames, globalNames);
    
    % Find which local names have duplicates
    uniqueLocalIndices = unique(localIndices);
    
    % Sum the elements of the local vectors that correspond to the same
    % strain
    localNumAsDonorAux = zeros(length(uniqueLocalIndices),1);
    localNumAsRecipientAux = zeros(length(uniqueLocalIndices),1);
    localNumAsRecipientFromOutsideAux = zeros(length(uniqueLocalIndices),1);
    
    for elementIndex = 1:length(uniqueLocalIndices)
        localIndexNow = uniqueLocalIndices(elementIndex);
        localNumAsDonorAux(elementIndex) = sum(localNumAsDonor(localIndices==localIndexNow));
        localNumAsRecipientAux(elementIndex) = sum(localNumAsRecipient(localIndices==localIndexNow));
        localNumAsRecipientFromOutsideAux(elementIndex) = sum(localNumAsRecipientFromOutside(localIndices==localIndexNow));
    end
    
    numAsDonor(uniqueLocalIndices) = numAsDonor(uniqueLocalIndices) + localNumAsDonorAux;
    numAsRecipient(uniqueLocalIndices) = numAsRecipient(uniqueLocalIndices) + localNumAsRecipientAux;
    numAsRecipientFromOutside(uniqueLocalIndices) = numAsRecipientFromOutside(uniqueLocalIndices) + localNumAsRecipientFromOutsideAux;
    
end



function globalRecombinationFlow = addToTheGlobalRecombinationFlow(...
    globalRecombinationFlow, globalNames, localRecombinationFlow, rawLocalNames)
    
    % Remove the second part from local names.
    % ('rawLocalNames' look like 'ERS069993_02342')
    localNames = extractShortLabels(rawLocalNames);
    
    nGlobal = length(globalNames);
    
    % Find the indices of local names within global names
    nLocal = length(localNames);
    [~,localIndices] = ismember(localNames, globalNames);
    
    % Find which local names have duplicates
    uniqueLocalIndices = unique(localIndices);
    
    % Sum the columns of the localFlow that correspond to the same strain
    aux = zeros(nLocal+1, length(uniqueLocalIndices));
    for columnIndex = 1:length(uniqueLocalIndices)
        localIndexNow = uniqueLocalIndices(columnIndex);
        aux(:, columnIndex) = sum(localRecombinationFlow(:,localIndices == localIndexNow),2);
    end
    
    % Sum the rows of 'aux' that correspond to the same strain
    localFlowNew = zeros(length(uniqueLocalIndices)+1, length(uniqueLocalIndices));
    for rowIndex = 1:length(uniqueLocalIndices)
        localIndexNow = uniqueLocalIndices(rowIndex);
        localFlowNew(rowIndex,:) = sum(aux(localIndices == localIndexNow,:),1);
    end
    localFlowNew(length(uniqueLocalIndices)+1,:) = aux(nLocal+1,:);
    
    globalRecombinationFlow([uniqueLocalIndices; nGlobal+1], uniqueLocalIndices) = ...
        globalRecombinationFlow([uniqueLocalIndices; nGlobal+1], ...
        uniqueLocalIndices) + localFlowNew;
    
end


function newVector = increaseSizeIfNeeded(vector, newElementIndex)
    if newElementIndex > length(vector)
        newVector = [vector; zeros(length(vector),1)];
    else
        newVector = vector;
    end
end


function nCopies = computeNCopies(shortLabels, ersNames)
    
    nStrains = length(shortLabels);
    
    nCopies = zeros(length(ersNames),1);
    for strainIndex = 1:nStrains
        nameNow = shortLabels(strainIndex);
        indexOfStrain = find(strcmp(ersNames, nameNow));
        nCopies(indexOfStrain) = nCopies(indexOfStrain) + 1;
    end
    
end


function nDistinctCopies = computeNDistinctCopies(shortLabels, ersNames, snpData, uData)
    
    nStrains = length(ersNames);
    
    nDistinctCopies = zeros(length(ersNames),1);
    for strainIndex = 1:nStrains
        
        % Go through each strain in the data set, and find out the number
        % of nonidentical copies in the strain.
        nameNow = ersNames(strainIndex);
        indicesOfStrain = find(strcmp(shortLabels, nameNow));
        if length(indicesOfStrain)==1
            nDistinctCopies(strainIndex) = 1;
        elseif length(indicesOfStrain)>1
%             if length(indicesOfStrain)==9
%                 keyboard
%             end
            nDistinctCopies(strainIndex) = size(unique(snpData(indicesOfStrain,:), 'rows'),1);
        end
    end
    
end


function shortLabels = extractShortLabels(strainLabels)
    
    nStrains = length(strainLabels);
    shortLabels = cell(length(strainLabels),1);
    
    for strainIndex = 1:nStrains
        labelNow = strainLabels{strainIndex};
        underScorePos = find(labelNow == '_');
        shortLabels{strainIndex} = labelNow(1:underScorePos(1)-1);
    end
    
end



function writeOutputs(outputFile, outputDir, uData)
    
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    %% WRITE PARTITION TO FILE %%
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    
    partitionOutputFile = [outputDir uData.delimiter 'lineage_information.txt'];
    writePartition(outputFile, partitionOutputFile); % Writes both the lineage and the BAPS cluster
    
    
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    %% WRITE RECENT AND ANCESTRAL RECOMBINATIONS %%
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    for outputType = 1:2
        if outputType == 1
            fileName = 'recombinations_recent';
        elseif outputType == 2 || outputType == 4
            fileName = 'recombinations_ancestral';
        elseif outputType == 3 || outputType == 5
            fileName = 'recombinations_cluster_level';
        end
        segmentsOutputFile = [outputDir uData.delimiter fileName '.txt'];
        writeRecombinationsToTextFile(outputFile, segmentsOutputFile, outputType);
    end

end

