function uData = clearNonSignificantAncestralRecombinations(outputFile, type)
% If type == 1, clean lineageStructure from non-significant recombinations.
% If type == 2, clean clusterStructure from non-significant recombinations.
%
% Adds to uData either "lineageStructureCleaned" or
% "clusterStructureCleaned"

    load(outputFile); % uData
    
    if type == 1
        forLineages = true;
        ancestralSeqTable = createAncestralSequences(uData, forLineages); % Lineage ancestral sequences
        nLineages = max(uData.groupedPartition);
        lineageStructure = uData.lineageStructure;
    else
        forLineages = false;
        ancestralSeqTable = createAncestralSequences(uData, forLineages); % Cluster ancestral sequences
        nLineages = max(uData.partition);
        lineageStructure = uData.clusterStructure;
    end
    
    structureCleaned = uint8(lineageStructure);
    ancestralRecombinationSignificances = cell(1,nLineages);
    
    % Compute the SNP differences for all lineage pairs.
    snpDifferencesBetweenLineages = cell(nLineages, nLineages);
    for lineageIndex1 = 1 : nLineages-1
        for lineageIndex2 = lineageIndex1+1 : nLineages
            snpDifferencesBetweenLineages{lineageIndex1,lineageIndex2} = formulateSnpDifferenceData(ancestralSeqTable, lineageIndex1, lineageIndex2, uData.totalSequenceLength, uData.snpPositions);
        end
    end
    
    for lineageIndex = 1:nLineages
        
        significanceResults = clearNonSignificantAncestralRecombinationsInLineage(snpDifferencesBetweenLineages, lineageIndex, ...
            lineageStructure, uData.snpPositions, uData.totalSequenceLength, uData.ancestralLogBfThreshold, uData.priorCounts);
        
        % If there is only one segment remaining after merging segments
        % with each other based on non-significant differences in SNP
        % density, then make sure that this one segment belongs to the home
        % lineage.
        if size(significanceResults.segments,1) == 1
            significanceResults.segments(3) = lineageIndex;
        end
        
        structureCleaned(lineageIndex,:) = inferLineageStructure(uData.snpPositions, significanceResults.segments, ...
            lineageIndex, significanceResults.logBfForKeepingSeparate, uData.ancestralLogBfThreshold);
        
        recombinationIndices = find(significanceResults.segments(:,3) ~= lineageIndex & ...
            significanceResults.logBfForKeepingSeparate > uData.ancestralLogBfThreshold);
        ancestralRecombinationSignificances{lineageIndex} = significanceResults.logBfForKeepingSeparate(recombinationIndices);
        
    end
    
    if type == 1
        uData.lineageStructureCleaned = structureCleaned;
        uData.lineageAncestralLogBf = ancestralRecombinationSignificances;
    elseif type == 2
        uData.clusterStructureCleaned = structureCleaned;
        uData.clusterAncestralLogBf = ancestralRecombinationSignificances;
    end
    
    save(outputFile, 'uData');
    
end


function lineageStructure = inferLineageStructure(snpPositions, segments, lineageIndex, logBfForKeepingSeparate, ancestralLogBfThreshold)
    
    nonSignificant = find(segments(:,3) ~= lineageIndex & logBfForKeepingSeparate < ancestralLogBfThreshold);
    segments(nonSignificant,3) = lineageIndex;
    % These are recombinations that could not be merged by the algorithm 
    % with home origin, because there was so little of home origin.
    % Therefore we remove them manually here.
    
    nSegments = size(segments,1);
    lineageStructure = zeros(1, length(snpPositions),'uint8');
    for segmentIndex = 1:nSegments
        first = segments(segmentIndex, 1);
        last = segments(segmentIndex, 2);
        origin = segments(segmentIndex, 3);
        lineageStructure(snpPositions>=first & snpPositions<=last) = origin;
    end
    if any(lineageStructure == 0)
        error('Should not happen');
    end
    
end


function significanceResults = clearNonSignificantAncestralRecombinationsInLineage(snpDifferencesBetweenLineages, ...
    lineageIndex, lineageStructure, snpPositions, totalSequenceLength, ancestralLogBfThreshold, priorCounts)
    
    % Identify ancestral segments detected in this lineage by a HMM
    segments = identifySegments(lineageStructure, lineageIndex, snpPositions, totalSequenceLength);
    % "segments" has three columns: first, last, and origin
    nSegments = size(segments,1);
    
    if nSegments == 1
        
        significanceResults.segments = segments;
        significanceResults.neighborToJoinWith = 0;
        significanceResults.logBfForKeepingSeparate = nan;
        
    else
        
        segmentLengths = segments(:,2) - segments(:,1) + 1;
        
        % Compute significances for all the segments
        
        logBfForKeepingSeparate = zeros(nSegments,1);
        % This is the logBf for joining a home segment segment with its
        % neighor, or the more favourable of its two neighbors, if two 
        % neighbors exist. For segments that are recombinations, this is
        % the logBF for merging the segment with the home lineage.
        
        neighborToJoinWith = zeros(nSegments,1);
        % 0 if both neighbors are the same, 1, if left neighbor is better,
        % 2 if right neighbor is better, -1 if the segment is a 
        % recombination and can only be joined with the home lineage
        
        for segmentIndex = 1:nSegments
            
            logBfResults = computeSignificances(snpDifferencesBetweenLineages, segments, segmentIndex, priorCounts, lineageIndex);
            % Note: logBfResults may be a scalar or 1*2 array.
            % Note 2: "computeSignificances" computes significances
            % differently for home lineage segments and recombinant
            % segments.
            
            % neighborToJoinWith should be -1 for recombinant segments, to
            % indicate that these can be joined only with the home lineage.
            if segments(segmentIndex,3) ~= lineageIndex
                
                logBfForKeepingSeparate(segmentIndex) = logBfResults;
                neighborToJoinWith(segmentIndex) = -1;
                
            else
                
                [minValue, minIndex] = min(logBfResults);
                
                if length(logBfResults) == 1
                    logBfForKeepingSeparate(segmentIndex) = minValue;
                    neighborToJoinWith(segmentIndex) = 0;
                    
                elseif length(logBfResults) == 2
                    
                    % By default, propose joining with the neighbor that 
                    % has smaller logBF. However, if that neighbor is 
                    % shorter than the segment, the other neighbor longer,
                    % and the other neighbor has also logBf below the
                    % threshold, then propose joining with that other
                    % neighbor.
                    
                    neighborLengths = segmentLengths([segmentIndex-1 segmentIndex+1]);
                    if segmentLengths(segmentIndex) > neighborLengths(minIndex) ...
                        && segmentLengths(segmentIndex) <= neighborLengths(3-minIndex) ...
                        && logBfResults(3-minIndex) < ancestralLogBfThreshold
                        
                        neighborToJoinWith(segmentIndex) = 3-minIndex;
                        logBfForKeepingSeparate(segmentIndex) = logBfResults(3-minIndex);
                        
                    else
                        
                        neighborToJoinWith(segmentIndex) = minIndex;
                        logBfForKeepingSeparate(segmentIndex) = minValue;
                        
                    end
                    
                end
                
            end
        end
        
        % A segment that has the smallest log BF, and also less than the
        % threshold, will be merged with its neighbors. However, a segment 
        % may be merged with its neighbors only if the total length of the 
        % neighbors to which it will be merged is greater than the length 
        % of the segment itself. (applies to both the home segments as well
        % as the recombinations)
        %
        % Merging home segments to other segments will be attempted
        % first.
        indToMerge = determineIndexToMergeWithNeighbors(segments, logBfForKeepingSeparate, neighborToJoinWith, ancestralLogBfThreshold);
        % Returns nan, if none of the segments satisfies the conditions.
        
        while ~isnan(indToMerge)
            
            
            %disp(['Merging ' num2str(indToMerge) ' with ' num2str(neighborToJoinWith(indToMerge)) '.']);
            %disp([' Segment ' num2str(indToMerge) ' removed.']);
            
            segments = mergeSegmentWithNeighbors(segments, indToMerge, neighborToJoinWith(indToMerge), lineageIndex);
            
            % Recompute BFs for the newly emerged segment and its
            % neighbors.
            % In inputs, "logBfForKeepingSeparate and "neighborToJoinWith"
            % correspond to values before the merge, while "segments" has
            % already been updated.
            
            
            [neighborToJoinWith, logBfForKeepingSeparate] = updateSignificances(snpDifferencesBetweenLineages, segments, indToMerge, neighborToJoinWith, logBfForKeepingSeparate, priorCounts, lineageIndex, ancestralLogBfThreshold);
            
            indToMerge = determineIndexToMergeWithNeighbors(segments, logBfForKeepingSeparate, neighborToJoinWith, ancestralLogBfThreshold);
            
        end
        significanceResults.segments = segments;
        significanceResults.neighborToJoinWith = neighborToJoinWith;
        significanceResults.logBfForKeepingSeparate = logBfForKeepingSeparate;
    end
end



function [neighborToJoinWith, logBfForKeepingSeparate] = updateSignificances(snpDifferencesBetweenLineages, segments, joinedIndex, neighborToJoinWith, logBfForKeepingSeparate, priorCounts, lineageIndex, ancestralLogBfThreshold)
    % Note that "segments" is already updated, i.e., segment with index 
    % "joinedIndex" has already been merged with its neighbor, but
    % "logBfForKeepingSeparate" and "neighborToJoinWith" are still in their
    % values before merging. Also "joinedIndex" refers to the index of the
    % merged segment before merging.
    
    nSegmentsBeforeMerging = length(logBfForKeepingSeparate);
    
    if neighborToJoinWith(joinedIndex) == 1
        % A home segment "joinedIndex" has been merged with its left
        % neighbor
        toRemove = joinedIndex;
        toSetToNan = joinedIndex - 1;
        
    elseif neighborToJoinWith(joinedIndex) == 2
        % A home segment "joinedIndex" has been merged with its right 
        % neighbor
        toRemove = joinedIndex + 1;
        toSetToNan = joinedIndex;
        
    elseif neighborToJoinWith(joinedIndex) == 0
        % Home segment has been merged with its only neighbor lineage
        
        if joinedIndex == 1
            % First segment has been merged with the second segment
            toRemove = 2;
            toSetToNan = 1;
            
        elseif joinedIndex == nSegmentsBeforeMerging
            % Two last segments have been merged
            toRemove = nSegmentsBeforeMerging;
            toSetToNan = nSegmentsBeforeMerging - 1;    
            
        else
            % A segment has been merged with its two neighbors.
            toRemove = [joinedIndex joinedIndex+1];
            toSetToNan = joinedIndex - 1;
            
        end
        
    elseif neighborToJoinWith(joinedIndex) == -1
        % The merged segment was a recombination that was merged with the
        % home lineage.
        
        if joinedIndex > 1 && joinedIndex < nSegmentsBeforeMerging && ...
                neighborToJoinWith(joinedIndex-1) ~= -1 && ...
                neighborToJoinWith(joinedIndex+1) ~= -1
            % A recombinant segment was merged with its both neighbors
            toRemove = [joinedIndex joinedIndex+1];
            toSetToNan = joinedIndex - 1;
            
        elseif joinedIndex > 1 && neighborToJoinWith(joinedIndex-1) ~= -1
            % A recombinant segment was merged with its left neighbor
            toRemove = joinedIndex;
            toSetToNan = joinedIndex - 1;
            
            
        elseif joinedIndex < nSegmentsBeforeMerging && neighborToJoinWith(joinedIndex+1) ~= -1
            % A recombinant segment was merged with its right neighbor
            toRemove = joinedIndex + 1;
            toSetToNan = joinedIndex;
            
        else
            % Neither of the neighbors of the recombinant segment was
            % merged with it, because neither of them belonged to the home
            % lineage, with which recombinations are only allowed to be
            % merged.
            toRemove = [];
            toSetToNan = joinedIndex;
            
        end
            
    end
    
    logBfForKeepingSeparate(toRemove) = [];
    logBfForKeepingSeparate(toSetToNan) = nan;
    neighborToJoinWith(toRemove) = [];
    neighborToJoinWith(toSetToNan) = nan;
    
    
    nSegments = size(segments,1);
    newSegmentIndex = find(isnan(logBfForKeepingSeparate));
    
    if nSegments == 1
        segmentsToRecompute = [];
        logBfForKeepingSeparate = nan;
        neighborToJoinWith = 0;
    else
        
        if newSegmentIndex == nSegments
            segmentsToRecompute = newSegmentIndex + [-1 0];

        elseif newSegmentIndex == 1
            segmentsToRecompute = newSegmentIndex + [0 1];

        else
            segmentsToRecompute = newSegmentIndex + [-1 0 1];
        
        end
        
        % After any change in the non-recombinant parts, all recombinations
        % must always be re-computed.
        recombinationSegments = find(segments(:,3) ~= lineageIndex)'; % TEST
        segmentsToRecompute = unique([segmentsToRecompute, recombinationSegments]); % TEST
        
    end
    
    segmentLengths = segments(:,2) - segments(:,1) + 1;
    
    for segmentIndexNow = segmentsToRecompute
        
        try
        logBfResults = computeSignificances(snpDifferencesBetweenLineages, segments, segmentIndexNow, priorCounts, lineageIndex);
        catch
            keyboard
        end
        % Note: logBfResults may be a scalar or 1*2 array.
        
        if segments(segmentIndexNow,3) ~= lineageIndex
            % A segment is a recombination that can be joined only with the
            % home cluster. Note that "segments" stores information of 
            % the segments after merging has already taken place.
            
            logBfForKeepingSeparate(segmentIndexNow) = logBfResults;
            neighborToJoinWith(segmentIndexNow) = -1;
            
        else
            % A segment is a home segment
            
            [minValue, minIndex] = min(logBfResults);
            
            if length(logBfResults) == 1
                logBfForKeepingSeparate(segmentIndexNow) = minValue;
                neighborToJoinWith(segmentIndexNow) = 0;

            elseif length(logBfResults) == 2

                % By default, propose joining with the neighbor that 
                % has smaller logBF. However, if that neighbor is 
                % shorter than the segment, the other neighbor longer,
                % and the other neighbor has also logBf below the
                % threshold, then propose joining with that other
                % neighbor.

                neighborLengths = segmentLengths([segmentIndexNow-1 segmentIndexNow+1]);
                if segmentLengths(segmentIndexNow) > neighborLengths(minIndex) ...
                    && segmentLengths(segmentIndexNow) <= neighborLengths(3-minIndex) ...
                    && logBfResults(3-minIndex) < ancestralLogBfThreshold

                    neighborToJoinWith(segmentIndexNow) = 3-minIndex;
                    logBfForKeepingSeparate(segmentIndexNow) = logBfResults(3-minIndex);

                else

                    neighborToJoinWith(segmentIndexNow) = minIndex;
                    logBfForKeepingSeparate(segmentIndexNow) = minValue;

                end

            end
        end
    end
end



function indToMerge = determineIndexToMergeWithNeighbors(segments, logBfForKeepingSeparate, neighborToJoinWith, logBfThreshold)
    
    % Go through the segments until a suitable segment has been found.
    
    homeSegmentIndices = find(neighborToJoinWith ~= -1);
    % Recombinations have value -1, because they can be merged only with
    % the home lineage.
    
    % Order segments according to their significance.
    [~, orderedSegments] = sort(logBfForKeepingSeparate);    
    % The following lines would keep the home segments on top
    %orderedHomeSegments = orderedSegments(ismember(orderedSegments, homeSegmentIndices));
    %orderedRecombinationSegments = orderedSegments(~ismember(orderedSegments, homeSegmentIndices));
    %orderedSegments = [orderedHomeSegments; orderedRecombinationSegments];
    
    counterToTest = 1;
    indexToTry = orderedSegments(counterToTest);
    nSegments = size(segments,1);
    
    segmentLengths = segments(:,2) - segments(:,1) + 1;
    
    indToMerge = nan;
    
    if nSegments > 1
        % Continue as long as the index to merge has not been found and
        % there are still left indices to try.
        while isnan(indToMerge) && ~isnan(indexToTry)
            % Try the most promising index
            
            if neighborToJoinWith(indexToTry) == 0
                % Segment is a home segment and it is proposed to be joined
                % with its only neighbor origin
                if indexToTry == 1
                    neighborLength = segmentLengths(2);
                elseif indexToTry == nSegments
                    neighborLength = segmentLengths(end-1);
                else
                    neighborLength = sum(segmentLengths([indexToTry-1 indexToTry+1]));
                end

            elseif neighborToJoinWith(indexToTry) == 1
                % Segment is a home segment and it is proposed to be joined
                % with left neighbor
                neighborLength = segmentLengths(indexToTry-1);

            elseif neighborToJoinWith(indexToTry) == 2
                % Segment is a home segment and it is proposed to be joined
                % with right neighbor
                neighborLength = segmentLengths(indexToTry+1);
                
            elseif neighborToJoinWith(indexToTry) == -1
                % Segment is a recombination and can only be joined with
                % the home lineage
                neighborLength = sum(segmentLengths(homeSegmentIndices));
                
            end
            
            
            if logBfForKeepingSeparate(indexToTry) < logBfThreshold && ...
                    neighborLength >= segmentLengths(indexToTry)
                % A suitable segment was found
                indToMerge = indexToTry;
            else
                counterToTest = counterToTest + 1;
                if counterToTest <= nSegments
                    indexToTry = orderedSegments(counterToTest);
                else
                    % All segments have been investigated and no suitable
                    % segment was detected.
                    indexToTry = nan;
                end
            end  
        end
    end
end



function segments = mergeSegmentWithNeighbors(segments, segmentIndex, neighborToJoinWith, lineageIndex)
    % Updates "segments", such that the segment with "segmentIndex" is
    % merged with its neighbors.
    % 
    % if "neighborToJoinWith" is ==1, a home lineage segment is to be 
    % joined with its left neighbor, if ==2, then with the right neightbor.
    % If ==1, then either the segment has only one neighbor (it's in the 
    % beginning or end of the whole sequence) or both neighbor segments
    % have the same origin. If ==-1, then a recombinant segment is to be
    % joined with the home lineage (in this case the code checks which of
    % its neighbors are home segments).
    %
    % "segments" has three columns: first, last, and origin
    
    nSegments = size(segments,1);
    
    if neighborToJoinWith == 1
        neighborIndex = segmentIndex - 1;
    elseif neighborToJoinWith == 2
        neighborIndex = segmentIndex + 1;
    elseif neighborToJoinWith == 0
        if segmentIndex == 1
            neighborIndex = 2;
        elseif segmentIndex == nSegments
            neighborIndex = nSegments - 1;
        else
            neighborIndex = [segmentIndex-1 segmentIndex+1];
        end
        
    elseif neighborToJoinWith == -1
        % Segment is a recombination and it will be merged with home
        % lineage.
        neighborIndex = [];
        
        if segmentIndex > 1 && segments(segmentIndex-1, 3) == lineageIndex
            neighborIndex = [neighborIndex segmentIndex-1];
        end
        
        if segmentIndex < nSegments && segments(segmentIndex+1,3) == lineageIndex
            neighborIndex = [neighborIndex segmentIndex+1];
        end
        
    end
    
    if length(neighborIndex) == 1
        
        if neighborIndex == segmentIndex - 1;
            newLast = segments(segmentIndex, 2);
            segments(neighborIndex,2) = newLast;
            
        elseif neighborIndex == segmentIndex + 1
            newFirst = segments(segmentIndex,1);
            segments(neighborIndex,1) = newFirst;
        end
        segments(segmentIndex,:) = [];
        
    elseif length(neighborIndex) == 2
        
        newLast = segments(neighborIndex(2),2);
        segments(neighborIndex(1),2) = newLast;
        segments([segmentIndex neighborIndex(2)],:) = [];
        
    elseif length(neighborIndex) == 0
        % The segment will be merged with home lineage, although neither of
        % its neighbors belongs to the home lineage.
        segments(segmentIndex,3) = lineageIndex;
        
    end
    
end



function logBfForKeepingSeparate = computeSignificances(snpDifferencesBetweenLineages, segments, segmentIndex, priorCounts, lineageIndex)
    currentOrigin = segments(segmentIndex, 3);
    nSegments = size(segments,1);
    if nSegments == 1
        error('Number of segments is one');
    end
    
    if currentOrigin == lineageIndex
        % Here we are computing significances for a segment that belongs to
        % the home cluster. The significance for keeping the segment
        % separate vs. merging it with its neighbor(s) is computed.

        % Origins of the neighboring segments
        if segmentIndex == 1
            leftOrigin = [];
        else
            leftOrigin = segments(segmentIndex-1,3);
        end

        if segmentIndex == nSegments
            rightOrigin = [];
        else
            rightOrigin = segments(segmentIndex+1,3);
        end

        if isempty(leftOrigin)
            altOrigin = rightOrigin;
            neighborSegmentsToConsider = segmentIndex+1;

        elseif isempty(rightOrigin)
            altOrigin = leftOrigin;
            neighborSegmentsToConsider = segmentIndex-1;

        elseif rightOrigin == leftOrigin
            altOrigin = leftOrigin;
            neighborSegmentsToConsider = [segmentIndex-1 segmentIndex+1];

        else
            altOrigin = [leftOrigin rightOrigin];
            neighborSegmentsToConsider = [segmentIndex-1 segmentIndex+1];
            % Unlike in the previous case, here each of the
            % segments must be considered separately, because they
            % have different origins 
        end

        if length(altOrigin) == 1
            logBfForKeepingSeparate = computeLogBfFromCounts(snpDifferencesBetweenLineages, currentOrigin, altOrigin, segments, segmentIndex, neighborSegmentsToConsider, priorCounts);
        else
            logBfForKeepingSeparate = zeros(1,2);
            % Two possible alternative origins (combine with left or right segments)
            for i=1:2
                altOriginNow = altOrigin(i);
                neighborSegmentNow = neighborSegmentsToConsider(i);
                logBfForKeepingSeparate(i) = computeLogBfFromCounts(snpDifferencesBetweenLineages, currentOrigin, altOriginNow, segments, segmentIndex, neighborSegmentNow, priorCounts);
            end
        end
    else
        
        % Here we are computing significances for a segment that does not
        % belong to the home cluster, i.e., is a recombination. The 
        % significance for keeping the segment separate vs. merging it with 
        % the home cluster is computed.
        logBfForKeepingSeparate = computeLogBfForDiscardingRecombination(snpDifferencesBetweenLineages, lineageIndex, segments, segmentIndex, priorCounts);
        
    end
end



function logBfForKeepingSeparate = computeLogBfForDiscardingRecombination(snpDifferencesBetweenLineages, homeLineageIndex, segments, segmentIndex, priorCounts)
    % Compute logBF for not merging a recombination with the home cluster.
    
    currentOrigin = segments(segmentIndex,3);
    if currentOrigin == homeLineageIndex
        error('incorrect recombination');
    end
    snpDifferenceData = getSnpDifferenceData(snpDifferencesBetweenLineages, currentOrigin, homeLineageIndex);
    
    % Returns the counts of SNPs and non-SNPs in the
    % regions that are tried to be combined.
    segmentCounts = getSnpCounts(snpDifferenceData, segments, segmentIndex);
    
    homeSegmentIndices = find(segments(:,3) == homeLineageIndex);
    homeLineageCounts = getSnpCounts(snpDifferenceData, segments, homeSegmentIndices');
    homeLineageCounts = sum(homeLineageCounts,1); % Total SNP counts in neighbors
    
    logBfForKeepingSeparate = computeBinomialLogml([segmentCounts; homeLineageCounts], priorCounts);
    
end



function logBfForKeepingSeparate = computeLogBfFromCounts(snpDifferencesBetweenLineages, currentOrigin, altOrigin, segments, segmentIndex, neighborSegmentsToConsider, priorCounts)
    
    snpDifferenceData = getSnpDifferenceData(snpDifferencesBetweenLineages, currentOrigin, altOrigin);
    
    % Returns the counts of SNPs and non-SNPs in the
    % regions that are tried to be combined.
    segmentCounts = getSnpCounts(snpDifferenceData, segments, segmentIndex);
    neighborSegmentCounts = getSnpCounts(snpDifferenceData, segments, neighborSegmentsToConsider);
    neighborSegmentCounts = sum(neighborSegmentCounts,1); % Total SNP counts in neighbors
    logBfForKeepingSeparate = computeBinomialLogml([segmentCounts; neighborSegmentCounts], priorCounts);
end



function data = formulateSnpDifferenceData(ancestralSeqTable, currentOrigin, altOrigin, totalSequenceLength, snpPositions)    
    
    % "data" has two fields: "totalSequenceLength" and
    % "snpPositions", where the "snpPositions" now tells
    % the locations of the SNPs between the lineages. 
    
    % "ancestralSeqTable" contains ancestral sequences. If some element is
    % equal to 9, then it means that at this column every strain in the
    % lineage has a recent recombination, and, so, nothing is known of the
    % ancestral sequences. A between lineage SNP will not be marked at this
    % columns.
    
    ancestralSeqForComparison = ancestralSeqTable([currentOrigin altOrigin], :);
    ancestralSeqAsCell = num2cell(ancestralSeqForComparison, 1);
    nAlleles = cellfun(@(x) length(unique(x)), ancestralSeqAsCell);
    data.totalSequenceLength = totalSequenceLength;
    data.snpPositions = snpPositions;
    
    eitherIsUnknown = cellfun(@(x) any(x==9), ancestralSeqAsCell);
    nAlleles(eitherIsUnknown) = 1; % Don't count these as SNPs either.
    
    data.snpPositions(nAlleles==1) = []; % Remove positions not SNPs between ancestral sequences. 
end


function data = getSnpDifferenceData(snpDifferencesBetweenLineages, currentOrigin, altOrigin)    
    
    % "data" has two fields: "totalSequenceLength" and
    % "snpPositions", where the "snpPositions" now tells
    % the locations of the SNPs between the lineages. 
    
    % "ancestralSeqTable" contains ancestral sequences. If some element is
    % equal to 9, then it means that at this column every strain in the
    % lineage has a recent recombination, and, so, nothing is known of the
    % ancestral sequences. A between lineage SNP will not be marked at this
    % columns.
    
    data = snpDifferencesBetweenLineages{min(currentOrigin, altOrigin), max(currentOrigin, altOrigin)};
end



function counts = getSnpCounts(data, segments, segmentIndices)
    % "data" has fields "snpPositions" and "totalSequenceLength"
    % 
    % "segments" has three columns, "first", "last", "origin" (the last one is not needed here)
    % 
    % "segmentIndices" specifies the segments, in which the SNP counts must
    % be reported. "segmentIndices" must be a row vector.
    
    counts = zeros(length(segmentIndices),2); % n1, n0
    
    for segmentIndex = segmentIndices
        
        first = segments(segmentIndex, 1);
        last = segments(segmentIndex, 2);
        counts(segmentIndices==segmentIndex, 1) = length(find(data.snpPositions >= first & data.snpPositions <= last));
    end
    segmentLengths = segments(segmentIndices,2) - segments(segmentIndices,1) + 1;
    counts(:,2) = segmentLengths - counts(:,1);
    
end


function logBfForKeepingSeparate = computeBinomialLogml(counts, priorCounts)
    % "counts" must be a 2*2 table, such that rows represent counts [n1,n0]
    % in two different clusters.
    % 
    % "priorCounts" is [alpha, beta], which are the parameters of the beta 
    % distribution for SNP frequency.
    
    counts(3,:) = sum(counts); % Last row is counts for the combined cluster
    
    priorCounts = priorCounts([1 1 1],:);
    
    logml = ...
        gammaln(sum(priorCounts,2)) - sum(gammaln(priorCounts),2) ...
        + sum(gammaln(priorCounts + counts),2) ...
        - gammaln(sum(priorCounts + counts,2));
    
    logBfForKeepingSeparate = logml(1) + logml(2) - logml(3);
    
end
