#include "maxfreq.h"
#include "border.h"
#include <stdio.h>
#include <math.h>
#include <getopt.h>
#include <assert.h>



typedef std::vector<double> doublevector;

uint32_t calls = 0;


void
normalize(doublevector & seq)
{
	double m = 0;
	double v = 0;

	for (uint32_t i = 0; i < seq.size(); i++)
		m += seq[i];
	m /= seq.size();
	for (uint32_t i = 0; i < seq.size(); i++)
		v += (seq[i] - m)*(seq[i] - m);
	v /= (seq.size() - 1);
	v = sqrt(v);

	for (uint32_t i = 0; i < seq.size(); i++)
		seq[i] = (seq[i] - m) / v;
}



void
init(bordervector & cur, const doublevector & seq, double (*scorefun)(double, uint32_t))
{
	maxfreq minfr(seq.size()), maxfr(seq.size());

	maxfr.lock(0);
	minfr.lock(0);
	double sum = 0;

	for (uint32_t i = 0; i < seq.size(); i++) {
		sum += seq[i];
		maxfr.add(i, seq[i]);
		minfr.add(i, -seq[i]);

		cur[i].score = scorefun(sum, i + 1);
		cur[i].index = -1;
		cur[i].max = maxfr.max()->freq();
		cur[i].min = -minfr.max()->freq();

		//printf("%d %f %f %f\n", i, cur[i].score, cur[i].min, cur[i].max);
	}
}


void
optimize(const bordervector & prev, bordervector & cur, const doublevector & seq, double (*scorefun)(double, uint32_t), FILE *logf, uint32_t r, bool opt = true, bool mdl = false)
{
	workvector works(seq.size());
	worklist act;
	TAILQ_INIT(&act);
	maxfreq minfr(seq.size()), maxfr(seq.size());

	double mdlpenalty = 1.5*log(seq.size());


	maxfr.lock(0);
	minfr.lock(0);
	maxfr.add(0, seq[0]);
	minfr.add(0, seq[0]);
	cur[0] = prev[0];

	uint32_t workload = 0;

	if (logf) // For the sake of completeness
		fprintf(logf, "%d 0 0\n", r);

	for (uint32_t i = 1; i < seq.size(); i++) {

		if (i % 1000 == 0) {
			printf("%d %d\r", i, workload);
			fflush(stdout);
		}

		// Update the structures
		maxfr.lock(i);
		maxfr.add(i, seq[i]);
		minfr.lock(i);
		minfr.add(i, -seq[i]);

		works[i].ind = i;
		TAILQ_INSERT_TAIL(&act, &works[i], active);
		workload++;
		
		work *w, *wnext;
		for (w = TAILQ_FIRST(&act); w; w = wnext) {
			wnext = TAILQ_NEXT(w, active);
			w->add(seq[i]);
			bool overlap = false;
			if (mdl)
				overlap = w->min <= cur[w->ind - 1].max && w->max >= cur[w->ind - 1].min;
			else
				overlap = w->min <= prev[w->ind - 1].max && w->max >= prev[w->ind - 1].min;

			if (overlap && opt) {
				if (logf)
					fprintf(logf, "%d %d %d\n", r, w->ind, i);
				//printf("Clean %d %d\n", i, w->ind);
				TAILQ_REMOVE(&act, w, active);
				maxfr.clean(w->ind);
				minfr.clean(w->ind);
				workload--;
			}
		}

		// find the maximum
		const maxfreq::node *maxn = maxfr.max();
		const maxfreq::node *minn = minfr.max();
		cur[i] = prev[i];

		TAILQ_FOREACH(w, &act, active) {
			while (w->ind > maxn->index) maxn = maxn->right;
			while (w->ind > minn->index) {minn = minn->right; assert(minn);}

			double score = scorefun(w->sum, w->count);
			
			if (mdl)
				score += cur[w->ind - 1].score - mdlpenalty;
			else
				score += prev[w->ind - 1].score; 

			if (score > cur[i].score) {
				cur[i].score = score;
				cur[i].index = w->ind - 1;
				cur[i].max = maxn->freq();
				cur[i].min = -minn->freq();
			}
		}
	}

	if (logf) {
		work *w;
		TAILQ_FOREACH (w, &act, active) {
			fprintf(logf, "%d %d %lu\n", r, w->ind, seq.size());
		}
	}
}

void
readsequence(FILE *f, doublevector & seq)
{
	double a;
	while (fscanf(f, "%lf", &a) == 1)
		seq.push_back(a);
}


uint32_t binomial_max = 2;

double
binomial(double sum, uint32_t count)
{
	calls++;
	uint32_t m = count * (binomial_max - 1);
	double fr = sum / m; 
	double res = 0;
	if (sum > 0)
		res += sum * log(fr);
	if (sum < m)
		res += (m - sum) * log(1 - fr);
	return res;
}

double
gaussian(double sum, uint32_t count)
{
	calls++;
	double fr = sum / count; 
	return fr * sum / 2;
}

double
poisson(double sum, uint32_t count)
{
	calls++;
	double fr = sum / count; 
	double res = -sum;
	if (sum > 0)
		res += sum * log(fr);
	return res;
}


void
printsegment(FILE *f, std::vector<bordervector> & b, bool mdl)
{
	int32_t ind = b[0].size() - 1;
	uint32_t k = b.size() - 1;
	while (ind >= 0) {
		int32_t j = b[k][ind].index;
		fprintf(f, "%d %d\n", j + 1, ind);
		ind = j;
		if (!mdl) k--;
	}
}



int
main(int argc, char **argv)
{
	static struct option longopts[] = {
		{"out",             required_argument,  NULL, 'o'},
		{"in",              required_argument,  NULL, 'i'},
		{"log",             required_argument,  NULL, 'l'},
		{"method",          required_argument,  NULL, 'm'},
		{"k",               required_argument,  NULL, 'k'},
		{"noopt",           no_argument,        NULL, 'n'},
		{"help",            no_argument,        NULL, 'h'},
		{"bmax",            no_argument,        NULL, 'a'},
		{ NULL,             0,                  NULL,  0 }
	};

	char *inname = NULL;
	char *outname = NULL;
	FILE *logf = NULL;
	uint32_t segcnt = 2;
	bool opt = true;
	bool mdl = false;
	bool norm = true;

	double (*scorefun)(double, uint32_t) = gaussian;



	int ch;
	while ((ch = getopt_long(argc, argv, "o:i:hnm:l:k:a:", longopts, NULL)) != -1) {
		switch (ch) {
			case 'h':
				printf("Usage: %s -i <input file> -o <output file> [-k segnum] [-m <method>] [options]\n", argv[0]);
				printf("  -h    print this help\n");
				printf("  -i    input file\n");
				printf("  -o    output file\n");
				printf("  -l    log file\n");
				printf("  -k    number of segments (default: 2)\n");
				printf("  -n    switch off optimizations\n");
				printf("  -a    alphabet size (used for binomial, default: 2)\n");
				printf("  -m    method:\n");
				printf("          g = gaussian (default)\n");
				printf("          p = poisson\n");
				printf("          b = binomial\n");

				return 0;
				break;
			case 'k':
				segcnt = atoi(optarg);
				break;
			case 'i':
				inname = optarg;
				break;
			case 'n':
				opt = false;
				break;
			case 'm':
				switch (optarg[0]) {
					case 'p':
						scorefun = poisson;
						norm = false;
						break;
					case 'b':
						scorefun = binomial;
						norm = false;
						break;
				}
				break;
			case 'l':
				logf = fopen(optarg, "w");
				break;
			case 'a':
				binomial_max = atoi(optarg);
				break;
			case 'o':
				outname = optarg;
				break;
		}
	}

	if (inname == NULL || outname == NULL) {
		printf("Missing files\n");
		return 1;
	}

	doublevector seq;

	FILE *f = fopen(inname, "r");
	readsequence(f, seq);
	fclose(f);

	if (norm)
		normalize(seq);

	if (segcnt == 0) {
		segcnt = 2;
		mdl = true;
	}

	std::vector<bordervector> borders(segcnt);

	borders[0].resize(seq.size());
	init(borders[0], seq, scorefun);

	for (uint32_t i = 1; i < borders.size(); i++) {
		borders[i].resize(seq.size());
		optimize(borders[i - 1], borders[i], seq, scorefun, logf, i, opt, mdl);
		double maxcalls = uint64_t(seq.size()) + i*double(seq.size() - 1) *seq.size() / 2;
		printf("\nCalls %d %.0f %f %f\n", calls, maxcalls, double(calls) / maxcalls, borders[i].back().score);
	}

	FILE *out = fopen(outname, "w");
	printsegment(out, borders, mdl);
	fclose(out);
		
}
