run.evolution <- function(simulation.name, variables.in.grid.names, variables.in.grid.values, n.generations, n.strains, n.core.genes.to.simulate, n.features.per.gene, gene.length, mutation.rate, recombination.rate, recombination.acceptance.par, genome.size, fitness.cost.per.excess.gene, deletion.rate, novel.gene.introduction.rate, hgt.rate, fixed.core.size, hgt.acceptance.par, save.interval, save.path, save.extension, print.output=TRUE, population=NULL, start.generation=1) {

	# A population from a previous simulation can be given as 
	# the "population" parameter. The index of the first generation
	# to be simulated is specified by "start.generation". So, if
	# a population is given, start.generation should be the generation
	# for the population + 1.
	
	# Initialization of the population, unless given as a parameter:
	if (is.null(population)) {
		population <- initialize.data.structures(genome.size=genome.size, n.strains=n.strains, gene.presence.probability=1.0, n.features.per.gene=n.features.per.gene, n.core.genes.to.simulate=n.core.genes.to.simulate, gene.length=gene.length, fixed.core.size=fixed.core.size)
		# population has three fields:
		# n.strains, core.genes, gene.indicators, event.counts.
	}
	
	time.start <- proc.time()['elapsed']
	
	# Simulate the given number of generations
	for (generation.index in seq(start.generation,n.generations)) {
		
		# Sample descendents
		population <- sample.offspring(pop=population, genome.size=genome.size, fitness.cost.per.excess.gene=fitness.cost.per.excess.gene)		
		
		
		# Save at regular intervals
		if (generation.index%%save.interval == 0) {
			elapsed <- proc.time()['elapsed']-time.start
			save(population, elapsed, file=paste(save.path, '/gen', generation.index, save.extension, '.RData', sep=''))
#			
			if (print.output) {
				print(ncol(population$gene.indicators))
				print(paste('Gen ', generation.index, ' complete. Time=', elapsed, '.', sep=''))
			}
		}
		
		
		# Simulate detailed evolution for core genes
		if (n.core.genes.to.simulate>0) {
			
			# Mutations
			population <- add.mutations(pop=population, mutation.rate=mutation.rate, n.genes=n.core.genes.to.simulate)
			
			# Recombinations
			population <- add.core.recombinations(pop=population, recombination.rate=recombination.rate, recombination.acceptance.par=recombination.acceptance.par, gene.length=gene.length, n.genes=n.core.genes.to.simulate)
		
		}
		
		
		# Simulate evolution of the genome content		
		if (genome.size > 0) {	
			
			# Gene deletions
			population <- remove.genes(pop=population, deletion.rate=deletion.rate)
			
			# Introductions of novel genes
			population <- add.novel.acquisitions(pop=population, novel.gene.introduction.rate=novel.gene.introduction.rate)
			
			# Recombinations
			population <- add.horizontal.gene.transfer(pop=population, hgt.rate=hgt.rate, hgt.acceptance.par=hgt.acceptance.par)
		
		}
		
	}

	return(population)
}



################################
## Initialize data structures ##
################################
initialize.data.structures <- function(genome.size, n.strains, gene.presence.probability, n.features.per.gene, n.core.genes.to.simulate, gene.length=gene.length, fixed.core.size) {
	
	# Try loading the mapping from block distances to Hamming distances
	mapping.file.name <- paste('mapping_feat', n.features.per.gene, '_len', gene.length, '.RData', sep='')
	if (file.exists(mapping.file.name)) {
		load(mapping.file.name)
		# block.to.hamming.mapping (first element tells the expected Hamming
		# distance when block distance is 0, and so on...)
	} else {
		stop('Block to Hamming mapping does not exist for specified parameters')
	}
	
	pop <- list()
	pop$n.strains <- n.strains
	pop$block.to.hamming <- block.to.hamming.mapping
	if (n.core.genes.to.simulate > 0) {	
		# Initialize core genes for which detailed evolution is simulated.
		# Each gene is represented by n.features.per.gene integers.
		pop$core.genes <- matrix(1, nrow=n.strains, ncol=n.features.per.gene*n.core.genes.to.simulate)
	} else {
		pop$core.genes <- NULL
	}
	
	if (genome.size > 0) {
		# Initialize genome content
		values <- ifelse(runif(n=n.strains*genome.size)<gene.presence.probability,1,0)
		pop$gene.indicators <- matrix(values, nrow=n.strains, ncol=genome.size, byrow=T)
		if (fixed.core.size > 0) {
			pop$gene.indicators[,seq(1,fixed.core.size)] <- 1
		}
		
	} else {
		pop$gene.indicators <- NULL
	}
	pop$fixed.core.size <- fixed.core.size
	
	# Variable to record the realized number of events in the history of 
	# the population
	pop$event.counts <- list()
	pop$event.counts$core.mut <- 0
	pop$event.counts$core.rec <- 0
	pop$event.counts$rec.snp.counts <- rep(0, 500)
	# First element is the number of recombinations with zero SNPs, and so on.
	#pop$event.counts$rec.strain.dist <- rep(0, 51)
	# 0, 0.001, 0.002, 0.003, ..., 0.050. Used for recording the core distance
	# between the donor and recipient of a recombination.
	
	pop$event.counts$novel.introduction <- 0
	pop$event.counts$deletion <- 0
	pop$event.counts$hgt.acquisition <- 0
	pop$event.counts$hgt.deletion <- 0
	
	return(pop)
}



sample.offspring <- function(pop, genome.size, fitness.cost.per.excess.gene) {
	# Sample descendants with replacement from the original strains.
	#
	# Every strain that has more genes than the total genome size 
	# has fitness disadvantage which is equal to the specified cost 
	# raised to the power of the number of excess genes.
	
	n.strains <- pop$n.strains
	
	if (!is.null(pop$gene.indicators)) {
		
		n.genes.in.strains <- apply(pop$gene.indicators,1,sum)
		n.excess.genes.in.strains <- pmax(n.genes.in.strains-genome.size, 0)
		fitnesses <- fitness.cost.per.excess.gene^n.excess.genes.in.strains
		
		# The fixed core appears in the beginning of the gene indicators.
		# If any of these is missing, the fitness is immediately zero.
		if (pop$fixed.core.size > 0) {
			n.core.genes.present <- rowSums(pop$gene.indicators[,seq(1,pop$fixed.core.size), drop=FALSE])
			zero.fitness.strains <- which(n.core.genes.present<pop$fixed.core.size)
			if (length(zero.fitness.strains)>0) {
				fitnesses[zero.fitness.strains] <- 0
			}
		}
		
	} else {
	
		fitnesses <- rep(1, n.strains)
	}
	
	if (!is.null(pop$fitness.coef)) {
		fitnesses <- fitnesses*pop$fitness.coef
	}
	
	offspring.indices <- sample(x=n.strains, size=n.strains, replace=T, prob=fitnesses)	
	
	# (The following code avoids allocation of new tables..)
	to.remove <- setdiff(1:n.strains, offspring.indices)
	to.be.used.in.replace <- offspring.indices[duplicated(offspring.indices)]
	
	if (!is.null(pop$gene.indicators)) {
		pop$gene.indicators[to.remove,] <- pop$gene.indicators[to.be.used.in.replace,]
		pop <- remove.empty.genes(pop=pop)
	}
	
	if (!is.null(pop$core.genes)) {
		pop$core.genes[to.remove,] <- pop$core.genes[to.be.used.in.replace,]
	}
	
	if (!is.null(pop$fitness.coef)) {
		pop$fitness.coef[to.remove] <- pop$fitness.coef[to.be.used.in.replace]
	}
	
	return(pop)
}



add.mutations <- function(pop, mutation.rate, n.genes) {
	# Randomly select the given number of strains
	# to be mutated at each core gene.
	
	n.strains <- pop$n.strains
	n.features.per.gene <- ncol(pop$core.genes) / n.genes
	n.strains.to.mutate <- rbinom(n=n.genes, size=n.strains, prob=mutation.rate/n.strains)
	
	pop$event.counts$core.mut <- pop$event.counts$core.mut + sum(n.strains.to.mutate)
	
	for (gene.index in 1:n.genes) {
		if (n.strains.to.mutate[gene.index] > 0) {

			strains.to.mutate <- sample(x=n.strains, size=n.strains.to.mutate[gene.index])
			
			for (strain.index in strains.to.mutate) {
				feature.to.mutate <- (gene.index-1)*n.features.per.gene + sample(x=n.features.per.gene, size=1)
				pop$core.genes[strain.index, feature.to.mutate] <- pop$core.genes[strain.index, feature.to.mutate] + 1
			}
		}
	}
	return(pop)
}



add.core.recombinations <- function(pop, recombination.rate, recombination.acceptance.par, gene.length, n.genes) {
	# Randomly select pairs of strains to recombine and try if the 
	# recombination is accepted.
	
	n.strains <- pop$n.strains
	n.features.per.gene <- ncol(pop$core.genes)/n.genes
	
	# Loop over all core genes. Add recombinations at each.
	for (gene.index in 1:n.genes) {
		
		n.rec.trials <- rpois(n=1, lambda=recombination.rate)
		
		if (n.rec.trials > 0) {
			
			for (trial.index in 1:n.rec.trials) {
				
				# Sample recipient and donor
				pair.of.strains <- sample(n.strains, 2)
				recipient <- pair.of.strains[1]
				donor <- pair.of.strains[2]
				
				# Calculate local divergence of recipient and donor
				# sequences.	
				divergence <- calculate.divergence(gene.index=gene.index, pop=pop, n.features.per.gene=n.features.per.gene, recipient=recipient, donor=donor)
				
				# Check the acceptance of the recombination
				acceptance.probability <- 10^(-recombination.acceptance.par*divergence)
				if (runif(n=1) < acceptance.probability) {
					# Recombination trial accepted
					
					sites.to.recombine <- get.feature.cols(gene.indexes=gene.index, n.features.per.gene=n.features.per.gene)
					
					# Compute block distance for bookkeeping.
					block.distance <- sum(abs(pop$core.genes[recipient,sites.to.recombine]-pop$core.genes[donor,sites.to.recombine]))
					# Note: this must be mapped to Hamming distance before computing statistics such as the r/m.
					
					# Record recombination statistics
					pop$event.counts$core.rec <- pop$event.counts$core.rec + 1
					pop$event.counts$rec.snp.counts[block.distance+1] <- pop$event.counts$rec.snp.counts[block.distance+1] + 1
					
					# Update the recipient
					pop$core.genes[recipient,sites.to.recombine] <- pop$core.genes[donor,sites.to.recombine]
				}
			}
		}
	}
	return(pop)
}


remove.genes <- function(pop, deletion.rate) {
	
	n.strains <- pop$n.strains
	n.genes <- ncol(pop$gene.indicators)
	n.core.genes <- length(which(colSums(pop$gene.indicators)==nrow(pop$gene.indicators)))
	
	n.genes.to.remove <- rpois(n=1, lambda=deletion.rate*n.core.genes)
	
	if (n.genes.to.remove > 0) {
		genes.to.remove <- sample(n.strains*n.genes, n.genes.to.remove)
		n.genes.present.before.removal <- sum(pop$gene.indicators[genes.to.remove])
		pop$gene.indicators[genes.to.remove] <- 0
		
		pop$event.counts$deletion <- pop$event.counts$deletion + n.genes.present.before.removal
	}
	
	# Remove all genes which are not present in any strain.
	if (any(colSums(pop$gene.indicators)==0)) {
		pop <- remove.empty.genes(pop=pop)
	}
	
	return(pop)

}



add.novel.acquisitions <- function(pop, novel.gene.introduction.rate) {
	
	require(e1071) # needed for rdiscrete
#	
	n.strains <- pop$n.strains
	n.genes <- ncol(pop$gene.indicators)
	n.core.genes <- length(which(colSums(pop$gene.indicators)==nrow(pop$gene.indicators)))
	
	#################################
	## Acquisitions of novel genes ##
	#################################
	n.novel.acquisitions <- rpois(n=1, lambda=novel.gene.introduction.rate*n.core.genes)

	if (n.novel.acquisitions > 0) {
		
		recipients <- sample(x=n.strains, size=n.novel.acquisitions, replace=T)
		columns.to.add <- matrix(0, nrow=n.strains, ncol=n.novel.acquisitions)
		
		if (n.novel.acquisitions > 1) {
			indices <- recipients + seq(0,n.strains*(n.novel.acquisitions-1), by=n.strains)
		} else {
			indices <- recipients
		}
		columns.to.add[indices] <- 1
		
		pop$gene.indicators <- cbind(pop$gene.indicators, columns.to.add)
		
		pop$event.counts$novel.introduction <- pop$event.counts$novel.introduction + n.novel.acquisitions
	}
	
	return(pop)
}



add.horizontal.gene.transfer <- function(pop, hgt.rate, hgt.acceptance.par) {
	# Randomly select pairs of strains to recombine and try if the 
	# recombination is accepted.
	
	n.strains <- pop$n.strains
	n.genes <- ncol(pop$gene.indicators)
	
	for (gene.index in 1:n.genes) {
		
		n.rec.trials <- rpois(n=1, lambda=hgt.rate)
		
		if (n.rec.trials > 0) {
			for (trial.index in 1:n.rec.trials) {
				# Sample recipient and donor
				pair.of.strains <- sample(n.strains, 2)
				recipient <- pair.of.strains[1]
				donor <- pair.of.strains[2]
				
				if (pop$gene.indicators[donor, gene.index] != pop$gene.indicators[recipient, gene.index]) {
					# The donor and the recipient must have different values.
					
					# Calculate Jaccard
					jaccard.dist <- length(which(pop$gene.indicators[donor,]!=pop$gene.indicators[recipient,])) / length(which(pop$gene.indicators[donor,]==1 | pop$gene.indicators[recipient,]==1))
					
					# Check the acceptance of the recombination
					acceptance.probability <- 10^(-hgt.acceptance.par*jaccard.dist)
					if (runif(n=1) < acceptance.probability) {
						# Recombination trial accepted	
						
						if (pop$gene.indicators[donor,gene.index]==1) {
							# Presence of the gene was donated
							pop$event.counts$hgt.acquisition <- pop$event.counts$hgt.acquisition + 1
						} else {
							# Absence of the gene was donated
							pop$event.counts$hgt.deletion <- pop$event.counts$hgt.deletion + 1
						}
						
						pop$gene.indicators[recipient,gene.index] <- pop$gene.indicators[donor,gene.index]
					}
				}
			}
		}
	}
	return(pop)
}