function startAncestryReconstruction(rootDirectory, datasetName, treeFileName)
    
    delimiter = getDelimiter;
    rootToSummaries = [rootDirectory datasetName 'Summaries' delimiter];
    
    %close('all');
    %
    %testAncestryReconstruction;
    %testRecombinationPatternIdentification;
    %testRecCountingFromPotentialDonors;
    %testDonorIdentification;
    
    %root = 'C:\Work\Brat_all\Brat4\wholeGenomeAnalyses\legionella_analysis';
    %rootToSummaries = [root delimiter 'legionellaSummaries'];
    
    %treeFileName = [rootToSummaries delimiter 'ST1_subtree.tre'];
    
    treeFileName = [rootToSummaries delimiter treeFileName];
    tree = phytreeread(treeFileName);
    nLeaves = get(tree,'NumLeaves');
    
    
    % FOR SINGLE POSITION
    if 1==0
        load([rootToSummaries delimiter 'leafNodesToHighlight.mat']); % toHighlight
        
        leafLabels = ones(nLeaves, 1);
        leafLabels(toHighlight) = 2;
        
        nodeLabels = reconstructAncestryForPosition(tree, leafLabels);
        
        toHighlight = find(nodeLabels == 1);
        view(tree, toHighlight);
    end
    
    
    % RECONSTRUCTION FOR POPULATION STRUCTURE IN SUBTREE
    if 1==1
        
        load([rootToSummaries delimiter 'globalNames.mat']);
        % globalNames
        
        load([rootToSummaries delimiter 'collectedPopStructures.mat']);
        % 'collectedPopStructures', 'collectedSnpPositions', ...
        % 'collectedGeneLengths', 'collectedMostCommonClusters', ...
        
        
        % This is just to formulate the combined population structure that
        % corresponds to the sub-tree
        namesOfStrainsToUse = get(tree, 'LeafNames');
        [~, pos] = ismember(namesOfStrainsToUse, globalNames);
        
        nGenes = length(collectedPopStructures);
        
        ancestralStructures = cell(1,nGenes);
        for geneIndex = 1:nGenes
            
            %disp([num2str(geneIndex) '/' num2str(nGenes)])
            
            origPopStructure = collectedPopStructures{geneIndex}(pos,:);
            
            ancestralStructures{geneIndex} = ...
                reconstructAncestry(origPopStructure, collectedSnpPositions{geneIndex}, ...
                collectedGeneLengths(geneIndex), tree, collectedMostCommonClusters(geneIndex));
            
%             % Draw some trees for visual inspection
%             popStructure = ancestralStructures{geneIndex};
%             for colToPlot = [1]
%                uniqueLabels = unique(popStructure(:,colToPlot));
%                toHighlight = find(popStructure(:,colToPlot) == uniqueLabels(1));
%                view(tree, toHighlight);
%             end
%             image(popStructure(:,1).*10);
        end
        
        save([rootToSummaries delimiter 'ancestralStructures.mat'], 'ancestralStructures');
        
    end
    
    
    % COUNTING RECOMBINATIONS FROM THE RECONSTRUCTION
    if 1==1
        
        load([rootToSummaries delimiter 'ancestralStructures.mat']); % 'ancestralStructures'
        
        nGenes = length(ancestralStructures);
        nRecInGene = -1.*ones(nGenes,1);
        
        for geneIndex = 1:nGenes
            %disp(num2str(geneIndex));
            nRecInGene(geneIndex) = countRecombinationsFromAncestralStructures(ancestralStructures{geneIndex}, tree);
            
        end
        
        save([rootToSummaries delimiter 'nRecInGene.mat'], 'nRecInGene');
        
        load([rootToSummaries delimiter 'geneNames.mat']);
        
        maxNameLength = max(cellfun(@length, geneNames));
        fid = fopen([rootToSummaries datasetName '_reconstructed_num_rec.txt'],'w');
        fprintf(fid, ['COG' blanks(maxNameLength - 1) '#Rec (reconstructred)\n']);
        for geneIndex = 1:length(geneNames)
            fprintf(fid, ['%-' num2str(maxNameLength+2) 's%-10.0f\n'], geneNames{geneIndex}, nRecInGene(geneIndex));
        end
        fclose(fid);
        % For testing:
        %toHighlight = (ancestralStructures{26}(:,1) == 2);
        %view(tree, toHighlight);
        
    end
end



function testAncestryReconstruction
    
    res = reconstructAncestry;
    reconstructAncestryForPosition = res.reconstructAncestryForPosition;
    % This is a function handle.
    
    % Test case 1:
    pointers = [1,2; 4,5; 3,6; 7,8];
    tree = phytree(pointers);

    leafLabels = [2 2 3 3 2];
    rootLabel = 2;
    nodeLabels = reconstructAncestryForPosition(tree, leafLabels, rootLabel);
    if ~isequal(nodeLabels, [2 2 3 3 2 2 2 2 2])
        disp('Test 1 fail-----------------');
    else
        disp('Test 1 pass');
    end
    
    
    % Test case 2:
    leafLabels = [2 2 3 3 2];
    rootLabel = 3;
    nodeLabels = reconstructAncestryForPosition(tree, leafLabels, rootLabel);
    if ~isequal(nodeLabels, [2 2 3 3 2 2 3 3 3])
        disp('Test 2 fail-----------------');
    else
        disp('Test 2 pass');
    end
    
    
    % Test case 3:
    leafLabels = [1 1 1 1 1];
    rootLabel = 2;
    nodeLabels = reconstructAncestryForPosition(tree, leafLabels, rootLabel);
    if ~isequal(nodeLabels, [1 1 1 1 1 1 1 1 2])
        disp('Test 3 fail-----------------');
        keyboard
    else
        disp('Test 3 pass');
    end
    
end



function testRecombinationPatternIdentification
    
    res = countRecombinationsFromAncestralStructures;
    identifyRecombinationSegments = res.identifyRecombinationSegments;
    
    nodeStructure = [1 1 1 2 2 1 1 1 3 3 3 3 3];
    childStructure = [2 2 1 2 2 1 1 4 4 2 2 3 3];
    segments = identifyRecombinationSegments(nodeStructure, childStructure);
    %[nodeStructure;childStructure]
    if ~isequal(segments, [1 2 2; 8 9 4; 10 11 2])
        disp('Test 4 fail-----------------');
    else
        disp('Test 4 pass');
    end
    
    nodeStructure = [1 1 1 2 1 2 2];
    childStructure = [1 1 1 2 2 3 4];
    
    segments = identifyRecombinationSegments(nodeStructure, childStructure);
    if ~isequal(segments, [5 5 2; 6 6 3; 7 7 4])
        disp('Test 5 fail-----------------');
    else
        disp('Test 5 pass');
    end
    
    nodeStructure = [3 2 3];
    childStructure = [2 1 1];
    segments = identifyRecombinationSegments(nodeStructure, childStructure);
    
    if ~isequal(segments, [1 1 2; 2 3 1])
        disp('Test 6 fail-----------------');
    else
        disp('Test 6 pass');
    end
    
    nodeStructure  = [2 2 1 1 2 2 2 3 3 1 2 2 1 1 3 3 3 5 2 2];
    childStructure = [1 1 1 1 2 2 2 2 2 1 1 1 1 1 1 1 5 5 5 5];
    segments = identifyRecombinationSegments(nodeStructure, childStructure);
    if ~isequal(segments, [1 2 1; 8 9 2; 11 12 1; 15 16 1; 17 17 5; 19 20 5])
        disp('Test 6.5 fail-----------------');
    else
        disp('Test 6.5 pass');
    end
    
end


function testRecCountingFromPotentialDonors
    
    res = countRecombinationsFromAncestralStructures;
    determineTheNumberOfRecombinationsFromPotentialDonors = res.determineTheNumberOfRecombinationsFromPotentialDonors;
    
    potentialDonors = [1 1 0 2 2 3 3 3 0 0 2 2];
    nRec = determineTheNumberOfRecombinationsFromPotentialDonors(potentialDonors);
    if nRec ~= 7%4
        disp('Test 7 fail-----------------');
    else
        disp('Test 7 pass');
    end
    
    potentialDonors = [1 1 0 2 2 3 3 0 0];
    nRec = determineTheNumberOfRecombinationsFromPotentialDonors(potentialDonors);
    if nRec ~= 6%3
        disp('Test 8 fail-----------------');
    else
        disp('Test 8 pass');
    end
    
    potentialDonors = [1 1];
    nRec = determineTheNumberOfRecombinationsFromPotentialDonors(potentialDonors);
    if nRec ~= 1
        disp('Test 9 fail-----------------');
    else
        disp('Test 9 pass');
    end
    
    potentialDonors = [0 1 1];
    nRec = determineTheNumberOfRecombinationsFromPotentialDonors(potentialDonors);
    if nRec ~= 2%1
        disp('Test 10 fail-----------------');
    else
        disp('Test 10 pass');
    end
    
    potentialDonors = [1 1 0];
    nRec = determineTheNumberOfRecombinationsFromPotentialDonors(potentialDonors);
    if nRec ~= 2%1
        disp('Test 11 fail-----------------');
    else
        disp('Test 11 pass');
    end
    
    potentialDonors = [0];
    nRec = determineTheNumberOfRecombinationsFromPotentialDonors(potentialDonors);
    if nRec ~= 1%0
        disp('Test 12 fail-----------------');
    else
        disp('Test 12 pass');
    end
    
    potentialDonors = [0 0 0 0 0];
    nRec = determineTheNumberOfRecombinationsFromPotentialDonors(potentialDonors);
    if nRec ~= 5%0
        disp('Test 13 fail-----------------');
    else
        disp('Test 13 pass');
    end
    
    potentialDonors = [4 4 4 4 4 4 4 4 4];
    nRec = determineTheNumberOfRecombinationsFromPotentialDonors(potentialDonors);
    if nRec ~= 1
        disp('Test 13 fail-----------------');
    else
        disp('Test 13 pass');
    end
    
end


function testDonorIdentification
    
    res = countRecombinationsFromAncestralStructures;
    identifyRecombinationSegments = res.identifyRecombinationSegments;
    identifyPotentialDonors = res.identifyPotentialDonors;
    
    % Assume:
    % NumNodes = 11;
    % NumLeaves = 6;
    childNow = 7;
    childStructure = [1 1 1 1 2 2 2 2 2 1 1 1 1 1 1 1 5 5 5 5];
    nodeStructure  = [2 2 1 1 2 2 2 3 3 1 2 2 1 1 3 3 3 5 2 2];
    ancestralStructures = ...
        [repmat(childStructure, [7 1]); ...
        repmat(nodeStructure,[2 1]); ...
        1 1 1 1 2 2 2 2 2 1 1 1 1 1 3 3 3 5 2 2; ...  % Could have donated the first 3 segments
        2 2 1 1 2 2 2 3 3 1 2 2 1 1 1 1 5 5 5 5];  % Could have donated the last 3 segments
    segments = identifyRecombinationSegments(nodeStructure, childStructure);
    
    possibleDonors = identifyPotentialDonors(segments, childNow, childStructure, ancestralStructures);
    if ~isequal(possibleDonors, [10 10 10 11 11 11])
        disp('Test 14 fail-----------------');
    else
        disp('Test 14 pass');
    end
    
    
    ancestralStructures = ...
        [repmat(childStructure, [7 1]); ...
        repmat(nodeStructure,[2 1]); ...
        repmat([1 1 1 1 2 2 2 2 2 1 1 1 1 1 3 3 3 5 2 2], [2 1])]; % Could have donated the first 3 segments
    possibleDonors = identifyPotentialDonors(segments, childNow, childStructure, ancestralStructures);
    if ~isequal(possibleDonors, [10 10 10 0 0 0])
        % The algorithm always picks the most recent possible donor, therefore 10 and not 11.
        disp('Test 15 fail-----------------');
    else
        disp('Test 15 pass');
    end
    
    
    ancestralStructures = ...
        [repmat(childStructure, [7 1]); ...
        repmat(nodeStructure, [2 1]); ...
        repmat([2 2 1 1 2 2 2 3 3 1 2 2 1 1 1 1 5 5 5 5], [2 1])]; % Could have donated the last 3 segments
    possibleDonors = identifyPotentialDonors(segments, childNow, childStructure, ancestralStructures);
    if ~isequal(possibleDonors, [0 0 0 10 10 10])
        disp('Test 16 fail-----------------');
    else
        disp('Test 16 pass');
    end
    
    
    ancestralStructures = ...
        [repmat(childStructure, [7 1]); ...
        repmat(nodeStructure,[4 1])]; % No donors available
    possibleDonors = identifyPotentialDonors(segments, childNow, childStructure, ancestralStructures);
    if ~isequal(possibleDonors, [0 0 0 0 0 0])
        disp('Test 17 fail-----------------');
    else
        disp('Test 17 pass');
    end
    
    
    ancestralStructures = ...
        [repmat(childStructure, [7 1]); ...
        repmat(nodeStructure, [2 1]); ...
        1 1 1 1 2 2 2 2 2 1 1 1 1 1 1 1 5 5 5 5; ... % Identical to the recipient -> could have donated the whole thing
        nodeStructure]; % No donors available
    possibleDonors = identifyPotentialDonors(segments, childNow, childStructure, ancestralStructures);
    if ~isequal(possibleDonors, [10 10 10 10 10 10])
        disp('Test 18 fail-----------------');
    else
        disp('Test 18 pass');
    end
    
    
    ancestralStructures = ...
        [repmat(childStructure, [7 1]); ...
        repmat(nodeStructure, [2 1]); ...
        1 1 1 1 2 2 2 2 2 1 1 1 1 1 1 1 5 5 2 2; ... % Could have donated everything except the last segment.
        nodeStructure]; % No donors available
    possibleDonors = identifyPotentialDonors(segments, childNow, childStructure, ancestralStructures);
    if ~isequal(possibleDonors, [10 10 10 10 10 0])
        disp('Test 19 fail-----------------');
    else
        disp('Test 19 pass');
    end
    
    
    ancestralStructures = ...
        [repmat(childStructure, [7 1]); ...
        repmat(nodeStructure, [2 1]); ...
        1 1 1 1 2 2 2 2 2 1 1 1 1 1 1 1 3 5 5 5; ... % Could have donated everything except the second last segment
        nodeStructure]; % No donors available
    possibleDonors = identifyPotentialDonors(segments, childNow, childStructure, ancestralStructures);
    if ~isequal(possibleDonors, [10 10 10 10 0 0])
        % Also the last element is 0, because the code does not attempt to
        % idntify donors for recombinations of length one segment. The
        % reason is that these will anyway be counted as one recombination,
        % that is, they did not come together with any other segment.
        disp('Test 20 fail-----------------');
    else
        disp('Test 20 pass');
    end
    
end





function ancestryStructures = reconstructAncestry(popStructure, snpPositions, totalSequenceLength, tree, homeCluster)
    
    if nargin == 0
        ancestryStructures.reconstructAncestryForPosition = @reconstructAncestryForPosition;
        return
    end
    
    [uniqueCols, ~, iC] = unique(popStructure', 'rows');  %[C,ia,ic] = unique(A)
    uniqueCols = uniqueCols';
    
    nCols = size(uniqueCols, 2);
    nNodes = get(tree,'NumNodes');
    
    nodeLabels = zeros(nNodes, nCols);
    for colIndex = 1:nCols
        
        leafLabelsNow = uniqueCols(:, colIndex);
        
        % Reconstruct ancestry for this column
        nodeLabels(:,colIndex) = reconstructAncestryForPosition(tree, leafLabelsNow, homeCluster);
        
    end
    
    
    ancestryStructures = nodeLabels(:,iC);
    
    % TEST:
    % isequal(uniqueCols(:,iC) == popStructure) -> should be TRUE.
    
end




function nodeLabels = reconstructAncestryForPosition(tree, origLeafLabels, origRootLabel)

    if nargin == 2
        origRootLabel = [];
    end

    nLeaves = length(origLeafLabels);

    % Map the labels to 1:nLabels range.
    uniqueLabels = union(origLeafLabels,origRootLabel);
    nLabels = length(uniqueLabels);
    leafLabels = zeros(nLeaves,1);
    for labelIndex = 1:nLabels
        leavesHere = find(origLeafLabels == uniqueLabels(labelIndex));
        leafLabels(leavesHere) = labelIndex;
    end

    if ~isempty(origRootLabel)
        rootLabel = find(uniqueLabels == origRootLabel);
    end

    nNodes = get(tree, 'NumNodes');
    pointers = get(tree, 'Pointers');

    table = 1000 .* ones(nNodes,nLabels); % Table to fill during postorder traversal

    % POSTORDER TRAVERSAL (from leaves to root)
    for nodeIndex = 1:nNodes

        if nodeIndex <= nLeaves

            % node is a leaf
            labelNow = leafLabels(nodeIndex);
            table(nodeIndex, labelNow) = 0;

        else

            % node is not a leaf
            leftChild = pointers(nodeIndex - nLeaves, 1);
            rightChild = pointers(nodeIndex - nLeaves, 2);

            leftChildValues = table(leftChild,:);
            rightChildValues = table(rightChild,:);

            for labelIndex = 1:nLabels

                toAdd = ones(1,nLabels);
                toAdd(labelIndex) = 0;

                if nodeIndex == nNodes && ~isempty(origRootLabel) && ...
                        rootLabel ~= labelIndex
                    % Node is a root, and root label has been given, and is
                    % different from the label of the current node.
                    rootAddition = 1000;
                else
                    rootAddition = 0;
                end

                table(nodeIndex, labelIndex) = ...
                    min(leftChildValues + toAdd) + ...
                    min(rightChildValues + toAdd) + rootAddition;
            end


        end

    end


    % PRE-ORDER TRAVERSAL (from root to leaves)
    nodeLabels = zeros(nNodes,1);
    nodeLabels(1:nLeaves) = leafLabels;
    
    parents = identifyParents(pointers, nLeaves, nNodes);
    
    for nodeIndex = nNodes : -1 : (nLeaves+1)

        if nodeIndex == nNodes
            
            [~,smallestIndex] = min(table(nodeIndex,:));
            nodeLabels(nodeIndex) = smallestIndex;
            
        else
            
            parentIndex = parents(nodeIndex);
            parentsLabel = nodeLabels(parentIndex);
            
            toAdd = ones(1,nLabels);
            toAdd(parentsLabel) = 0;
            
            valuesNow = table(nodeIndex,:) + toAdd;
            [~,smallestIndex] = min(valuesNow);
            nodeLabels(nodeIndex) = smallestIndex;
            
        end
        
    end
    
    % Transform node labels back to the original values.
    nodeLabels = uniqueLabels(nodeLabels);
    
end




function parents = identifyParents(pointers, nLeaves, nNodes)
    parents = zeros(nNodes,1);
    
    for nodeIndex = 1 : nNodes-1 % Root does not have a parent.
        parents(nodeIndex) = find(pointers(:,1)==nodeIndex | pointers(:,2)==nodeIndex) + nLeaves;
    end
    
end





function nRecombinations = countRecombinationsInTreeForGene(popStructure, snpPositions, totalSequenceLength, tr, homeCluster)
    % strainSubset may be a cell array and it contains the names of all
    % those strains, where we want to count the recombinations.
    
    
    % Detect all recombined fragments
    listOfSegments = detectSegments(popStructure, snpPositions, totalSequenceLength, homeCluster);
    % strain, first, last, origin
    
    if ~isempty(listOfSegments)
        
        % A segment is co-located with a block if they are from the same origin
        % they cover 90 of each other both ways. We note that different orders
        % of going through the segments might results in different blocks, but
        % in practice this will not be a problem.
        [blockInfo, segmentToBlock] = detectSegmentBlocks(listOfSegments);
        % 'blockInfo' is nBlocks times 3 (first,last,origin), where the 'first'
        % is the first base in the whole block and 'last' similarly.
        % 'blockStrains' is a cell array, where each cell is a list of strains
        % who have the block.
        listOfSegments(:,5) = segmentToBlock; % Add the block label for each segment.
        clear segmentToBlock


        % For each fragment block, identify a minimal number of common
        % ancestors
        %blockAncestors = identifyBlockAncestors(blockInfo, listOfSegments, tr);
        blockAncestors = identifyBlockAncestorsAlt(blockInfo, listOfSegments, tr);
        
        % TEST
        %nodeNames = get(tr,'NodeNames');
        %nodeNames{blockAncestors{1}}
        
        % Go through fragment blocks one by one. Count the number of
        % recombinations that are needed to explain it. If fragment blocks with
        % overlapping common ancestors have already been counted, count only
        % the ancestors that have not appeared before. Also, if some
        % blocks are from the same origin and have the same common ancestor,
        % then count these as one.
        nRecombinations = countRecombinations(blockInfo, blockAncestors);
        
    else
        
        nRecombinations = 0;
    
    end
    
end



function listOfSegments = detectSegments(popStructure, snpPositions, totalSequenceLength, homeCluster)
    
    listOfSegments = zeros(10000, 4); % strain, first, last, origin
    
    nStrains = size(popStructure,1);
    
    nSegmentsNow = 0;
    for strainIndex = 1:nStrains
        % Identify fragments in strain
        
        % Here we do not want missing data to break the sequence, so we
        % impute all missing values with the value on the left side of the
        % gap.
        %popStructure = imputeMissingValues(popStructure);
        
        segments = identifySegments(popStructure, strainIndex, snpPositions, totalSequenceLength);
        nNewSegments = size(segments,1);
        
        if nNewSegments>0
            segments = [repmat(strainIndex,[nNewSegments,1]), segments];
            listOfSegments((nSegmentsNow+1) : (nSegmentsNow+nNewSegments), :) = segments;
            nSegmentsNow = nSegmentsNow + nNewSegments;
        end
    end
    
    listOfSegments = listOfSegments(1:nSegmentsNow,:);
    originLengths = computeOriginLengths(listOfSegments);
    
    %[maxLength, maxOrigin] = max(originLengths);
    %if homeCluster ~= maxOrigin
    %    error('Home cluster is not the most common cluster in gene')
    %end
    
    homeClusterSegments = find(listOfSegments(:,4) == homeCluster);
    listOfSegments(homeClusterSegments,:) = []; % Remove home cluster fragments
    
end


% Compute origin counts
function originLengths = computeOriginLengths(listOfSegments)
    maxOrigin = max(listOfSegments(:,4));
    originLengths = zeros(1,maxOrigin);
    fragmentLengths = listOfSegments(:,3) - listOfSegments(:,2) + 1;
    for originIndex = 1:maxOrigin
        fragmentsHere = find(listOfSegments(:,4) == originIndex);
        originLengths(originIndex) = sum(fragmentLengths(fragmentsHere));
    end
end


function [blockInfo, segmentToBlock] = detectSegmentBlocks(segments)
%     % Test 1:
%     segments = [7 95 300 1; 10 100 305 1; 11 250 400 2; 13 600 800 3; ...
%             16 600 800 3; 19 150 650 2; 20 93 300 1; 30 100 300 3];
%     % Outcome should be:
%     blockInfoTrue = [93 305 1; 250 400 2; 600 800 3; 150 650 2; 100 300 3];
%     segmentToBlockTrue = [1 1 2 3 3 4 1 5]';
    
    
    % Go through each segment, and assign it to an existing block, or, if
    % no suitable existing block is found, to a new one.
    
    nSegments = size(segments,1);
    segmentToBlock = zeros(nSegments,1); % Mapping that tells the block for each segment
    blockInfo = zeros(10000,3); % first, last, origin
    
    blockInfo(1,:) = segments(1,2:4);
    segmentToBlock(1) = 1;
    nBlocks = 1;
    
    for segmentIndex = 2:nSegments
        % Compute the amount of this segment with all existing blocks
        amountOverlap = computeAmountOverlap(blockInfo(1:nBlocks,:), segments(segmentIndex,:));
        
        blockLengths = blockInfo(1:nBlocks,2) - blockInfo(1:nBlocks,1) + 1;
        propOverlapForBlocks = amountOverlap ./ blockLengths;
        segmentLength = segments(segmentIndex, 3) - segments(segmentIndex, 2) + 1;
        propOverlapForSegment = amountOverlap ./ segmentLength;
        
        minOverlap = min(propOverlapForBlocks, propOverlapForSegment);
        
        possibleBlock = find(minOverlap > 0.9);
        
        if length(possibleBlock) >= 1
            
            % Existing block was found. Assign the segment to that.
            if length(possibleBlock) > 1
                [largestMinOverlap, blockToAssign] = max(minOverlap);
            else
                blockToAssign = possibleBlock;
            end
            segmentToBlock(segmentIndex) = blockToAssign;
            newBlockFirst = min(segments(segmentIndex,2), blockInfo(blockToAssign,1));
            newBlockLast = max(segments(segmentIndex,3), blockInfo(blockToAssign,2));
            blockInfo(blockToAssign, 1) = newBlockFirst;
            blockInfo(blockToAssign, 2) = newBlockLast;
        else
            
            % Create a new block for the segment
            nBlocks = nBlocks + 1;
            blockInfo(nBlocks, 1) = segments(segmentIndex, 2);
            blockInfo(nBlocks, 2) = segments(segmentIndex, 3);
            blockInfo(nBlocks, 3) = segments(segmentIndex, 4);
            segmentToBlock(segmentIndex) = nBlocks;
        end
        
        %disp('XXXXXXXXXXXXXXXXXXXXXX');
        %disp(['Segment ' segmentIndex ' assigned.']);
        %disp(blockInfo(1:nBlocks,:));
        
    end
    
    blockInfo = blockInfo(1:nBlocks,:);
    
    % TESTING
    %if isequal(blockInfo, blockInfoTrue) && isequal(segmentToBlock, segmentToBlockTrue)
    %    disp('Correct');
    %else
    %    keyboard
    %end
end



function amountOverlap = computeAmountOverlap(blockInfo, segment)
    % Test 1:
    %blockInfo = [2,4,1; 2,8,1; 2,14,1; 7,9,1; 8,14,1; 12,14,1];
    %segment = [1, 6, 10, 1]; %(strain, first, last, origin)
    % Outcome should be: [0 3 5 3 3 0]
    %
    % Test 2:
    %blockInfo = [2,4,1; 2,8,1; 2,14,1; 7,9,1; 8,14,1; 12,14,1];
    %segment = [1, 6, 10, 2]; %(strain, first, last, origin)
    % Outcome should be: [0 0 0 0 0 0]
    %
    % Test 3:
    %blockInfo = [2,6,1; 2,8,1; 2,14,1; 7,9,1; 8,14,1; 10,14,1];
    %segment = [1, 6, 10, 1]; %(strain, first, last, origin)
    % Outcome should be: [1 3 5 3 3 1]
    
    segmentFirst = segment(2);
    segmentLast = segment(3);
    segmentOrigin = segment(4);
    
    segmentLength = segmentLast - segmentFirst + 1;
    
    amountOverlap = zeros(size(blockInfo,1),1);
    hasSameOrigin = (blockInfo(:,3) == segmentOrigin);
    notOverlapping = (blockInfo(:,1) > segmentLast | blockInfo(:,2) < segmentFirst);
    potentialBlocks = find(hasSameOrigin & ~notOverlapping);
    
    for blockIndex = potentialBlocks'
        
        blockFirst = blockInfo(blockIndex,1);
        blockLast = blockInfo(blockIndex,2);
        
        if blockFirst < segmentFirst % In this case must be blockLast >= segmentFirst
            amountOverlap(blockIndex) = min(segmentLength, blockLast - segmentFirst + 1);
        elseif blockLast > segmentLast % Now must be blockFirst <= segmentLast
            amountOverlap(blockIndex) = min(segmentLength, segmentLast - blockFirst + 1);
        else % In this case the whole block must be within the segment
            amountOverlap(blockIndex) = blockLast - blockFirst + 1;
        end
    end
end


function blockAncestors = identifyBlockAncestors(blockInfo, listOfSegments, tr)
    % USE ALT VERSION
    
    % Take one segment, continue upwards in the tree until you encounter a
    % node whose all descendants are not part of the same block as the
    % original segment. Mark the node before this as the ancestor. Remove
    % all segments in the block who had this ancestor from the list of
    % segments to consider.
    
    % 'globalNames' contains the names of the strains, ordered in the same
    % order as the rows in the 'popStructure', on which the first column
    % that specifies the strainIndex in listOfSegments is based.
    
    nSegments = size(listOfSegments,1);
    segmentAncestors = zeros(nSegments,1);
    
    nBlocks = size(blockInfo,1);
    blockAncestors = cell(nBlocks,1);
    
    for blockIndex = 1:nBlocks
        % Identify terminal nodes for this block
        
        nNodes = get(tr, 'NumNodes');
        nLeaves = get(tr, 'NumLeaves');
        isBlockNode = zeros(nNodes,1); % Whether all descendants of this node belong to the block.
        blockStrains = listOfSegments(listOfSegments(:,5) == blockIndex,1);
        isBlockNode(blockStrains) = 1;

        isTerminalBlockNode = zeros(nNodes,1); % If this node is block node, but its parent is not

        pointers = get(tr,'Pointers');
        for nodeIndex = nLeaves+1 : nNodes
            
            % Children of this node
            ch1 = pointers(nodeIndex - nLeaves,1);
            ch2 = pointers(nodeIndex - nLeaves,2);
            if isBlockNode(ch1) && isBlockNode(ch2)
                isBlockNode(nodeIndex) = 1;
            else
                if isBlockNode(ch1)
                    isTerminalBlockNode(ch1) = 1;
                elseif isBlockNode(ch2)
                    isTerminalBlockNode(ch2) = 1;
                end
            end
            
        end
        
        blockAncestors{blockIndex} = find(isTerminalBlockNode);
    end
end



function nRecombinations = countRecombinations(blockInfo, blockAncestors)
    
    nBlocks = size(blockInfo,1);
    
    setOfAncestors = zeros(2000,1);
    nAncestors = 0;
    for blockIndex = 1:nBlocks
        nAncestorsInBlock = length(blockAncestors{blockIndex});
        setOfAncestors(nAncestors+1 : nAncestors+nAncestorsInBlock) = blockAncestors{blockIndex};
        nAncestors = nAncestors + nAncestorsInBlock;
    end
    setOfAncestors = setOfAncestors(1:nAncestors);
    setOfAncestors = unique(setOfAncestors);
    nAncestors = length(setOfAncestors);
    
    ancestryTable = zeros(nAncestors, nBlocks);
    
    nElementsWritten = 0;
    for blockIndex = 1:nBlocks
        ancestorsHere = blockAncestors{blockIndex};
        nAncestorsHere = length(ancestorsHere);
        
        [sharedVals,indices] = intersect(setOfAncestors, ancestorsHere, 'stable');
        nElementsToAdd = length(indices);
        ancestryTable(indices, blockIndex) = nElementsWritten+1 : nElementsWritten+nElementsToAdd;
        nElementsWritten = nElementsWritten + nElementsToAdd;
    end
    
    countThis = zeros(1,nElementsWritten);
    for blockIndex = 1:nBlocks
        rowsHere = find(ancestryTable(:,blockIndex) > 0);
        elementValues = ancestryTable(rowsHere, blockIndex);
        
        if blockIndex == 1
            countThis(elementValues) = 1;
        else
            % Co-located earlier blocks:
            collectionOfCoLocatedBlocks = [];
            
            for rowIndex = rowsHere'
                elementValue = ancestryTable(rowIndex, blockIndex);
                coLocatedBlocksHere = find(ancestryTable(rowIndex,1:blockIndex-1) > 0);
                if isempty(intersect(coLocatedBlocksHere, collectionOfCoLocatedBlocks))
                    countThis(elementValue) = 1;
                else
                    countThis(elementValue) = -1;
                end
                collectionOfCoLocatedBlocks = union(coLocatedBlocksHere, collectionOfCoLocatedBlocks);
            end
        end
    end
    nRecombinations = sum(countThis == 1);
    
end



function blockAncestors = identifyBlockAncestorsAlt(blockInfo, listOfSegments, tr)
    
    % Take one segment, continue upwards in the tree until you encounter a
    % node whose all descendants are not part of the same block as the
    % original segment. Mark the node before this as the ancestor. Remove
    % all segments in the block who had this ancestor from the list of
    % segments to consider.
    
    % 'globalNames' contains the names of the strains, ordered in the same
    % order as the rows in the 'popStructure', on which the first column
    % that specifies the strainIndex in listOfSegments is based.
    
    nBlocks = size(blockInfo,1);
    nNodes = get(tr, 'NumNodes');
    nLeaves = get(tr, 'NumLeaves');
    pointers = get(tr,'Pointers');
    
    blockAncestors = cell(nBlocks,1);
    
    isBlockNodeInSomeBlock = zeros(nNodes,1);
    
    for blockIndex = 1:nBlocks
        
        isBlockNode = 3 .* ones(nNodes,1); % Whether all descendants of this node belong to the block.
        
        blockStrains = listOfSegments(listOfSegments(:,5) == blockIndex,1);
        isBlockNode(blockStrains) = 1; % 0:does not have block, 1:has block, 2:status unclear (children of different kinds), 3:not checked yet;
        
        otherLeaves = setdiff(1:nNodes, blockStrains);
        isBlockNode(otherLeaves) = 0;
        
        isBlockNode(nNodes) = 0; % Root is assumed not to have the block.
        
        % Assign status to each node, starting from the most recent one.
        for nodeIndex = nLeaves + 1 : nNodes

            ch1 = pointers(nodeIndex - nLeaves,1);
            ch2 = pointers(nodeIndex - nLeaves,2);

            if nodeIndex == nNodes
                % Root has been reached, and it has status 0 always.
                
                % Clarify the status of both children, if needed.
                if isBlockNode(ch1) == 2
                    isBlockNode = clearStatus(ch1, 0, isBlockNode, pointers, nLeaves);
                end
                
                if isBlockNode(ch2) == 2
                    isBlockNode = clearStatus(ch2, 0, isBlockNode, pointers, nLeaves);
                end
                
            else 
            
                if isBlockNode(ch1)==1 && isBlockNode(ch2)==1
                    % Both children are block nodes
                    isBlockNode(nodeIndex) = 1;

                elseif isBlockNode(ch1)==0 && isBlockNode(ch2)==0
                    % Neither children is block node
                    isBlockNode(nodeIndex) = 0;

                elseif isBlockNode(ch1)==1 && isBlockNode(ch2)==2
                    % ch1 is block node, ch2 is unclear
                    isBlockNode(nodeIndex) = 1;
                    isBlockNode = clearStatus(ch2, 1, isBlockNode, pointers, nLeaves);

                elseif isBlockNode(ch1)==2 && isBlockNode(ch2)==1
                    % ch1 is unclear, ch2 is block node
                    isBlockNode(nodeIndex) = 1;
                    isBlockNode = clearStatus(ch1, 1, isBlockNode, pointers, nLeaves);

                elseif isBlockNode(ch1)==0 && isBlockNode(ch2)==2
                    % ch1 is not block node, ch2 is unclear
                    isBlockNode(nodeIndex) = 0;
                    isBlockNode = clearStatus(ch2, 0, isBlockNode, pointers, nLeaves);

                elseif isBlockNode(ch1)==2 && isBlockNode(ch2)==0
                    % ch1 is unclear, ch2 is not block node
                    isBlockNode(nodeIndex) = 0;
                    isBlockNode = clearStatus(ch1, 0, isBlockNode, pointers, nLeaves);

                else
                    % Either:
                    % ch1==2 && ch2==2, or
                    % ch1==0 && ch2==1, or
                    % ch1==1 && ch2==0
                    isBlockNode(nodeIndex) = 2; % Status remains unclear.

                end
            end
        end
        
        % Identify as ancestors for this block all such nodes who are block
        % nodes, but whose parents are not block nodes.
        isBlockAncestor = zeros(nNodes,1);
        parents = identifyParents(pointers, nLeaves, nNodes);
        for nodeIndex = 1:nNodes-1
            nodeStatus = isBlockNode(nodeIndex);
            parentStatus = isBlockNode(parents(nodeIndex));
            
            if nodeStatus == 1 && parentStatus == 0
                isBlockAncestor(nodeIndex) = 1;
            end
        end
        blockAncestors{blockIndex} = find(isBlockAncestor);
        
        isBlockNodeInSomeBlock = isBlockNodeInSomeBlock | isBlockNode;
    end
    
    save('isBlockNodeInSomeBlock.mat','isBlockNodeInSomeBlock');
end


function isBlockNode = clearStatus(nodeIndex, newStatus, isBlockNode, pointers, nLeaves)
    
    isBlockNode(nodeIndex) = newStatus;
    
    ch1 = pointers(nodeIndex - nLeaves,1);
    ch2 = pointers(nodeIndex - nLeaves,2);
    
    if isBlockNode(ch1) == 2
        isBlockNode = clearStatus(ch1, newStatus, isBlockNode, pointers, nLeaves);
    end
                
    if isBlockNode(ch2) == 2
        isBlockNode = clearStatus(ch2, newStatus, isBlockNode, pointers, nLeaves);
    end
    
end




function nRec = countRecombinationsFromAncestralStructures(ancestralStructures, tree)
    
    if nargin == 0
        % Just return handles to all functions for testing.
        nRec.identifyRecombinationSegments = @identifyRecombinationSegments;
        nRec.determineTheNumberOfRecombinationsFromPotentialDonors = @determineTheNumberOfRecombinationsFromPotentialDonors;
        nRec.identifyPotentialDonors = @identifyPotentialDonors;
        return
    end
    
    % Go through each node, starting from the root. Check if there are
    % differences between the node and its children.
    
    nNodes = get(tree, 'NumNodes');
    nBranches = get(tree, 'NumBranches');
    nLeaves = get(tree, 'NumLeaves');
    pointers = get(tree, 'Pointers');
    
    if size(ancestralStructures,1) ~= nNodes
        error('Incorrect number of ancestral sequences');
    end
    
    nRec = 0;
    
    for nodeIndex = nNodes : -1 : nLeaves+1
        
        nodeStructure = ancestralStructures(nodeIndex,:);
        
        for childIndex = 1:2
            
            childNow = pointers(nodeIndex - nLeaves, childIndex);
            childStructure = ancestralStructures(childNow, :);
            
            %diffInStructure = structureNow - childStructure;
            %if any(diffInStructure ~=  0)
            %    nRec = nRec + 1;
            %    disp(num2str(nodeIndex - nLeaves));
            %end
            
            segments = identifyRecombinationSegments(nodeStructure, childStructure);
            
            if ~isempty(segments)
                if size(segments,1) > 1
                    
                    % Multiple segments have appeared in the branch
                    % simultaneously. Check if this combination can be seen
                    % in some older branch, which could have donated it.
                    potentialDonors = identifyPotentialDonors(segments, childNow, childStructure, ancestralStructures);
                    
                    nRecToAdd = determineTheNumberOfRecombinationsFromPotentialDonors(potentialDonors);
                    nRec = nRec + nRecToAdd;
                    
                else
                    
                    nRec = nRec + 1;
                    % There is one segment. No need to identify a donor, as
                    % this is anyway counted as another recombination.
                    
                end
                
            end
            
        end
        
    end
    
end




function segments = identifyRecombinationSegments(nodeStructure, childStructure)
    
    % Find out blocks of different ancestry
    diffInStructure = nodeStructure - childStructure;
    if all(diffInStructure == 0)
        % No recombinations here.
        segments = [];
        return
    end
    
    % Go though all block combinations (starting from the largest number of
    % blocks). Check if the block as such exists in some earlier branch.
    ancestralPositions = (diffInStructure == 0);
    recPositions = ~ancestralPositions;
    
    breakPoints = find(recPositions(2:end) - recPositions(1:end-1) ~= 0);
    childRecPatterns = childStructure;
    childRecPatterns(ancestralPositions) = nan;
    
    aux = childRecPatterns(2:end)-childRecPatterns(1:end-1);
    additionalBreakPoints = find(~isnan(aux) & aux ~= 0);
    % These are cases where the origin of a segment changes between some
    % initial breakpoints..
    
    breakPoints = union(breakPoints, additionalBreakPoints);
    % If breakPoints contains value k, then it means that between SNPs k and
    % k+1 there is a breakpoint.
    
    % Add starting point for a recombination that appears at the beginning.
    if recPositions(1) == 1
        breakPoints = [0 breakPoints];
    end
    
    if recPositions(end) == 1
        breakPoints = [breakPoints length(recPositions)];
    end
    
    % Investigate which of the breakpoints start a recombination
    recPositions = [recPositions 0];
    % Add one position, to prevent from exceeding dimensions in the
    % next line.
    startsARecombination = recPositions(breakPoints+1);
    
    nRecombinations = sum(startsARecombination);
    
    segments = zeros(nRecombinations, 3);
    
    listOfIndicesStartingARecombination = find(startsARecombination);
    for recIndex = 1:nRecombinations
        
        breakPointIndex = listOfIndicesStartingARecombination(recIndex);
        first = breakPoints(breakPointIndex) + 1;
        last = breakPoints(breakPointIndex + 1);
        
        origin = childStructure(first);
        segments(recIndex,1) = first;
        segments(recIndex,2) = last;
        segments(recIndex,3) = origin;
        
    end
    
end


function possibleDonorsForSegments = identifyPotentialDonors(segments, childIndex, childStructure, ancestralStructures)
    
    nSegments = size(segments, 1);
    
    possibleDonorsForSegments = zeros(1,nSegments);
    donorNotFound = true;
    
    numSegmentsInRecombination = nSegments;
    
    while donorNotFound && (numSegmentsInRecombination > 1)
        % Loop over subsets of varying sizes
        
        firstSegmentOfPutativeRecombination = 1;
        
        while donorNotFound && (firstSegmentOfPutativeRecombination <= (nSegments - numSegmentsInRecombination + 1))
            % Loop over positions where the donated segment could start
            
            %disp([num2str(numSegmentsInRecombination) ' ' num2str(firstSegmentOfPutativeRecombination)]);
            
            % Check if any of the ancestors has the pattern consisting of
            % segments from firstSegmentOfPutativeRecombination to
            % (firstSegmentOfPutativeRecombination +
            % numSegmentsInRecombination - 1).
            
            % Identify the pattern in the child
            firstSite = segments(firstSegmentOfPutativeRecombination, 1);
            lastSite = segments(firstSegmentOfPutativeRecombination + numSegmentsInRecombination - 1, 2);
            pattern = childStructure(firstSite:lastSite);
            
            % Matrix that contains the corresponding segment from all
            % ancestors.
            potentialDonors = ancestralStructures((childIndex + 1):end, firstSite:lastSite);
            nPotentialDonors = size(potentialDonors, 1);
            
            isPotentialDonor = all(potentialDonors' == repmat(pattern', [1 nPotentialDonors]));
            
            if any(isPotentialDonor)
                donorNotFound = false;
                possibleDonorHere = find(isPotentialDonor, 1, 'first') + childIndex;
                lastSegmentOfPutativeRecombination = firstSegmentOfPutativeRecombination + numSegmentsInRecombination - 1;
                possibleDonorsForSegments(firstSegmentOfPutativeRecombination : lastSegmentOfPutativeRecombination) = possibleDonorHere;
                
                % Need to identify the donors, if possible, for the
                % remaining segments. But only if there are two or more
                % segments. Individual segments from any donor will anyway
                % be counted as a single recombination.
                leftSideSegmentIndices = 1 : (firstSegmentOfPutativeRecombination - 1);
                if length(leftSideSegmentIndices) > 1
                    leftSideDonors = identifyPotentialDonors(segments(leftSideSegmentIndices,:), childIndex, childStructure, ancestralStructures);
                    possibleDonorsForSegments(leftSideSegmentIndices) = leftSideDonors;
                end
                
                rightSideSegmentIndices = lastSegmentOfPutativeRecombination + 1 : nSegments;
                if length(rightSideSegmentIndices) > 1
                    rightSideDonors = identifyPotentialDonors(segments(rightSideSegmentIndices,:), childIndex, childStructure, ancestralStructures);
                    possibleDonorsForSegments(rightSideSegmentIndices) = rightSideDonors;
                end
                
            end
            
            firstSegmentOfPutativeRecombination = firstSegmentOfPutativeRecombination + 1;
        end
        
        numSegmentsInRecombination = numSegmentsInRecombination - 1;
        
    end
    
end


function nRec = determineTheNumberOfRecombinationsFromPotentialDonors(potentialDonors)
    
    % potentialDonors is a row vector that specifies the donors for the
    % different segments. If a value is 0, then it means that a donor was
    % not assigned for this segment. Such segments are always counted as
    % one recombination, even if there are multiple consecutive 0s.
    %
    % If two or more consecutive elements have the same non-zero value, 
    % then they are counted as one recombination.
    %
    % Otherwise all other elements are counted as one recombination.
    
    isSameAsPreviousValue = [0 (potentialDonors(2:end) == potentialDonors(1:end-1))];
    
    dontCountThese = (isSameAsPreviousValue & potentialDonors ~= 0);
    
    %[potentialDonors; isSameAsPreviousValue; dontCountThese]
    
    nRec = length(potentialDonors) - sum(dontCountThese);
    
end
