#include "episode.h"
#include <algorithm>
#include <assert.h>

#include <boost/math/tools/roots.hpp>
#include <boost/math/tools/tuple.hpp>
#include <boost/math/distributions/poisson.hpp>
#include <boost/math/distributions/normal.hpp>

#include <gsl/gsl_linalg.h>


#include <iostream>
#include <fstream>
#include <sstream>


static double
normallogcdf(double a, double mean, double var)
{
	double x = (mean - a) / sqrt(2 * var);

	double t = 1 / (1 + 0.5 * fabs(x));

	double coeff[] = {-1.26551223, 1.00002368, 0.37409196, 0.09678418, -0.18628806, 0.27886807, -1.13520398, 1.48851587, -0.82215223, 0.17087277};

	double res = -x*x + log(t);
	double u = 1;

	for (uint32_t i = 0; i < 10; i++) {
		res += coeff[i] * u;
		u *= t;
	}
		
	return res - log(2);
}



episode::episode(uint32_t id, const symbolvector & events) :
	m_id(id),
	m_events(events), m_indexmap(events), m_edges(events.size()), m_support(0), 
	m_transitions(events.size()) 
{
	// pick unique labels
	std::vector<std::pair<symbol_t, uint32_t> > sym(m_events.size());

	for (uint32_t i = 0; i < m_events.size(); i++) {
		sym[i].first = m_events[i];
		sym[i].second = i;
	}

	std::sort(sym.begin(), sym.end());
	m_indexmap[sym[0].second] = 0;
	m_indexlabel.push_back(sym[0].first);

	uint32_t ind = 0;
	m_labelindex[sym[0].first] = 0;
	for (uint32_t i = 1; i < sym.size(); i++) {
		if (sym[i].first != sym[i - 1].first) {
			m_indexlabel.push_back(sym[i].first);
			ind++;
			m_labelindex[sym[i].first] = ind;
		}
		m_indexmap[sym[i].second] = ind;
	}

	m_psize = ind + 2;


	m_probs.resize(m_psize);
	m_target.resize(m_psize + 3);
}


void
episode::build() {
	intvector degree(m_events.size());
	boolvector used(m_events.size(), false);
	intlist sources;
	idmap ids;

	m_edges.init_degree(degree, sources);

	m_source = buildbasic(degree, sources, ids, used);

	// first prefix is an empty, so get rid of it
	m_prefixes.pop_front();

	intlist empty;
	m_sink = ids[empty];


	//printf("%p\n", m_sink);
}

episode::basicmachine::node *
episode::buildbasic(const intvector & degree, const intlist & sources, idmap & nodeids, boolvector & used)
{
	idmap::iterator it = nodeids.find(sources);
	if (it != nodeids.end()) return it->second;

	basicmachine::node *n = m_machine.add(nodeids.size());
	nodeids[sources] = n;
	n->value.estates.resize(m_psize);
	n->value.ecounts.resize(m_psize);

	//printf("src: %lu\n", sources.size());

	if (sources.size() != 0) {m_prefixes.push_back(used);} // avoid the full graph


	intlist pre;
	for (intlist::const_iterator it = sources.begin(); it != sources.end(); ++it) {
		intvector d = degree;

		intlist s = pre;
		intlist::const_iterator cp = it;
		++cp;
		s.insert(s.end(), cp, sources.end());

		m_edges.lower_degree(*it, d, s);

		used[*it] = true;
		basicmachine::node *m = buildbasic(d, s, nodeids, used);
		used[*it] = false;


		basicmachine::edge *e = m_machine.bind(n, m, m_events[*it]);
		e->value.index = m_indexmap[*it];
		m_transitions[*it].push_back(e);

		pre.push_back(*it);
	}

	return n;
}


void
episode::support_count(const sequence & s)
{
	// pick unique labels
	symbolvector sym = m_events;
	std::sort(sym.begin(), sym.end());
	sym.resize(std::distance(sym.begin(), std::unique(sym.begin(), sym.end())));

	basicmachine::node *cur = m_source;
	const sequence::event *last = NULL;
	uint32_t sid = 0;
	
	m_support = 0;


	for (substringiterator it(s, sym); !it.done(); ++it) {
		if (sid != it->s.sid) {
			if (last != NULL) {
				//printf("TAIL %d\n", s.taillength(last));
				cur->value.count += s.taillength(last);
				cur->value.ecounts[m_psize - 1] += s.taillength(last); // gap events
			}
			cur = m_source;
			for (sid++; sid < it->s.sid; sid++) {
				cur->value.count += s.length(sid);
				cur->value.ecounts[m_psize - 1] += s.length(sid); // gap events
			}
			last = NULL;
		}

		index_t t;
		if (last != NULL)
			t = it->s.index - last->s.index;
		else
			t = s.headlength(*it) + 1;

		last = *it;
		cur->value.count += t;
		cur->value.ecounts[m_labelindex[it->label]]++;
		cur->value.ecounts[m_psize - 1] += t - 1; // gap events
		
		basicmachine::edge *e = cur->target(it->label);
		//printf("EDGES %d (%d) %p %p %d %d\n", it->label, m_labelindex[it->label], cur, e, t, sid);
		if (e) {
			if (m_sink == e->to) m_support++;
			cur = e->to;
		}
	}

	if (last != NULL) {
		cur->value.count += s.taillength(last);
		cur->value.ecounts[m_psize - 1] += s.taillength(last); // gap events
	}
	cur = m_source;
	for (sid++; sid < s.rowcount(); sid++) {
		cur->value.count += s.length(sid);
		cur->value.ecounts[m_psize - 1] += s.length(sid); // gap events
	}

	// XXX compute the support
}

doublevector
episode::computeprob(const basicmachine::node *n) const
{
	doublevector res(m_probs.size() + 3);


	double m = -std::numeric_limits<double>::max();

	for (uint32_t i = 0; i < m_psize; i++) {
		m = std::max(m_probs[i] + m_trans[n->value.estates[i]], m);
	}

	//printf(" (%f)\n", m);
	
	for (uint32_t i = 0; i < m_psize; i++) {
		res[i] = exp(m_probs[i] + m_trans[n->value.estates[i]] - m);
	}

	

	//for (uint32_t i = 0; i < m_probs.size(); i++)
		//printf("%f, ", res[i]);
	//printf("\n");


	double total = 0;
	for (uint32_t i = 0; i < m_probs.size(); i++) total += res[i];
	for (uint32_t i = 0; i < m_probs.size(); i++) res[i] /= total;

	//for (uint32_t i = 0; i < m_probs.size(); i++)
		//printf("%f, ", res[i]);
	//printf("\n");


	
	for (uint32_t i = 0; i < m_psize; i++) {
		res[m_probs.size() + n->value.estates[i]] += res[i];
	}

	return res;
} 

doublevector
episode::computeprob(const basicmachine::node *n, double alpha, double *diff) const
{
	doublevector res(m_probs.size() + 3);


	double m = -std::numeric_limits<double>::max();

	for (uint32_t i = 0; i < m_psize; i++) {
		uint32_t s = n->value.estates[i]; 
		m = std::max(m_probs[i] + m_trans[s] + alpha*(diff[i] + diff[m_probs.size() + s]), m);
	}

	
	for (uint32_t i = 0; i < m_probs.size(); i++) {
		uint32_t s = n->value.estates[i]; 
		res[i] = exp(m_probs[i] + m_trans[s] + alpha*(diff[i] + diff[m_probs.size() + s]) - m);
	}


	double total = 0;
	for (uint32_t i = 0; i < m_probs.size(); i++) total += res[i];
	for (uint32_t i = 0; i < m_probs.size(); i++) res[i] /= total;

	for (uint32_t i = 0; i < m_psize; i++) {
		res[m_probs.size() + n->value.estates[i]] += res[i];
	}



	return res;
}

double
episode::cost(double alpha, double *diff) const
{
	double c = 0;
	for (basicmachine::nodemap::const_iterator it = m_machine.nodes().begin(); it != m_machine.nodes().end(); ++it) {
		const basicmachine::node *n = it->second;

		doublevector p = computeprob(n, alpha, diff);


		for (uint32_t i = 0; i < m_psize; i++) {
			uint32_t w = n->value.ecounts[i];
			if (w > 0) c += w*log(p[i]);
		}
	}

	return c;
}



void
episode::computetarget()
{
	m_target.assign(m_target.size(), 0);

	for (basicmachine::nodemap::iterator it = m_machine.nodes().begin(); it != m_machine.nodes().end(); ++it) {
		basicmachine::node *n = it->second;
		//printf("NODE: %p %d\n", n, n->value.count);

		for (uint32_t i = 0; i < m_psize; i++) {
			//printf("%d ", n->value.ecounts[i]);
			m_target[i] += n->value.ecounts[i];
			m_target[m_psize + n->value.estates[i]] += n->value.ecounts[i];
		}
		//printf("\n");
	}

	/*
	printf("TARGET: ");
	for (uint32_t i = 0; i < m_target.size(); i++)
		printf("%d ", m_target[i]);
	printf("\n");
	*/
}

void
episode::initparameters()
{
	for (uint32_t i = 0; i < m_probs.size(); i++) {
		m_probs[i] = 0; //log(m_target[i]);
	}

	for (uint32_t i = 0; i < 3; i++)
		m_trans[i] = 0; //log(1 + m_target[m_probs.size() + i]);
}

void
episode::updateparameters(double alpha, double *gradient)
{
	for (uint32_t i = 0; i < m_probs.size(); i++)
		m_probs[i] += alpha*gradient[i];

	for (uint32_t i = 0; i < 3; i++)
		m_trans[i] += alpha*gradient[m_probs.size() + i];

	double m = 0;	
	for (uint32_t i = 0; i < m_probs.size(); i++) m = std::max(m, m_probs[i]);
	for (uint32_t i = 0; i < m_probs.size(); i++) m_probs[i] -= m;

	m = 0;	
	for (uint32_t i = 0; i < 3; i++) m = std::max(m, m_trans[i]);
	for (uint32_t i = 0; i < 3; i++) m_trans[i] -= m;

	//for (uint32_t i = 0; i < m_probs.size(); i++) printf("%f, ", m_probs[i]);
	//printf("\n");
}


void
episode::buildstates(basicmachine::node *n, const basicmachine::node *ref)
{
	if (n->value.mark) return;

	n->value.mark = true;

	for (basicmachine::edgemap::iterator it2 = n->to.begin(); it2 != n->to.end(); ++it2) {
		basicmachine::edge *e = it2->second;

		basicmachine::edge *f = ref->target(it2->first);

		if (f) {
			n->value.estates[e->value.index] = 1;
			buildstates(e->to, f->to);
		}
		
	}
	
}

void
episode::buildstates()
{
	basicmachine::nodevector nodes = m_machine.order();
	for (basicmachine::nodemap::iterator it = m_machine.nodes().begin(); it != m_machine.nodes().end(); ++it) {
		basicmachine::node *n = it->second;
		n->value.mark = false;
		n->value.estates.assign(m_psize, 0);
	}
}


void
episode::buildstates(const boolvector & prefix)
{
	basicmachine::nodevector nodes = m_machine.order();

	for (basicmachine::nodemap::iterator it = m_machine.nodes().begin(); it != m_machine.nodes().end(); ++it) {
		basicmachine::node *n = it->second;
		n->value.mark = false;
		n->value.estates.assign(m_psize, 0);
	}

	for (uint32_t i = 0; i < prefix.size(); i++) {
		if (prefix[i]) {
			for (basicmachine::edgelist::iterator it = m_transitions[i].begin(); it != m_transitions[i].end(); ++it) {
				basicmachine::edge *e = *it;
				e->to->value.mark = true;
			}
		}
	}

	for (uint32_t i = 0; i < nodes.size(); i++) {
		basicmachine::node *n = nodes[i];
		if (!n->value.mark) continue;
		for (basicmachine::edgemap::iterator it2 = n->to.begin(); it2 != n->to.end(); ++it2) {
			basicmachine::edge *e = it2->second;
			e->to->value.mark = true;
		}
	}


	for (uint32_t i = 0; i < prefix.size(); i++) {
		if (prefix[i]) {
			for (basicmachine::edgelist::iterator it = m_transitions[i].begin(); it != m_transitions[i].end(); ++it) {
				basicmachine::edge *e = *it;
				if (e->from->value.mark) {
					e->from->value.estates[e->value.index] = 1;
				}
			}
		}
	}

	for (basicmachine::nodemap::iterator it = m_machine.nodes().begin(); it != m_machine.nodes().end(); ++it) {
		basicmachine::node *n = it->second;
		n->value.mark = false;
	}

	for (uint32_t i = 0; i < prefix.size(); i++) {
		if (!prefix[i]) {
			for (basicmachine::edgelist::iterator it = m_transitions[i].begin(); it != m_transitions[i].end(); ++it) {
				basicmachine::edge *e = *it;
				e->to->value.mark = true;
			}
		}
	}

	for (uint32_t i = 0; i < nodes.size(); i++) {
		basicmachine::node *n = nodes[i];
		if (!n->value.mark) continue;
		for (basicmachine::edgemap::iterator it2 = n->to.begin(); it2 != n->to.end(); ++it2) {
			basicmachine::edge *e = it2->second;
			e->to->value.mark = true;
		}
	}

	for (uint32_t i = 0; i < prefix.size(); i++) {
		if (!prefix[i]) {
			for (basicmachine::edgelist::iterator it = m_transitions[i].begin(); it != m_transitions[i].end(); ++it) {
				basicmachine::edge *e = *it;
				if (e->from->value.mark) {
					e->from->value.estates[e->value.index] = 2;
				}
			}
		}
	}

}

void
episode::testsuper(const episode *ep, const sequence & s)
{
	buildstates();
	buildstates(m_source, ep->m_source);
	m_source->value.estates.assign(m_psize, 0);

	computetarget();

	initparameters();

	iterscale();
	settransitions();

	double mean, var;
	computemean(s, mean, var);

	double r;

	if (mean > 10) {
		if (m_support <= mean) {
			boost::math::normal d(mean, sqrt(var));
			r = log(boost::math::cdf(complement(d, m_support)));
		}
		else
			r = normallogcdf(m_support, mean, var);
	}
	else {
		boost::math::poisson d(mean);
		r = log(boost::math::cdf(complement(d, m_support)));
	}

	m_pred = std::max(m_pred, mean);
	m_rank = std::max(r, m_rank);

	// used for debugging
	//printf("%f %f\n", mean, var);
	//printf("SUPERRANK: %f %f %d %f\n", r, exp(r), m_support, normallogcdf(m_support, mean, var));
}

void
episode::testind(const sequence & s)
{
	buildstates();
	computetarget();

	initparameters();

	iterscale();
	settransitions();

	double mean, var;
	computemean(s, mean, var);


	if (mean > 10) {
		if (m_support <= mean) {
			boost::math::normal d(mean, sqrt(var));
			m_indrank = log(boost::math::cdf(complement(d, m_support)));
		}
		else
			m_indrank = normallogcdf(m_support, mean, var);
	}
	else {
		boost::math::poisson d(mean);
		m_indrank = log(boost::math::cdf(complement(d, m_support)));
	}

	m_indpred = mean;

	// used for debugging
	//printf("%f %f\n", mean, var);
	//printf("INDRANK: %f %f %d %f\n", m_indrank, exp(m_indrank), m_support, normallogcdf(m_support, mean, var));
}


void
episode::test(const sequence & s)
{
	if (m_prefixes.size() > 0)
		m_rank = -std::numeric_limits<double>::max();
	else
		m_rank = 0;
	
	m_pred = 0;
	
	for (prefixlist::iterator it = m_prefixes.begin(); it != m_prefixes.end(); ++it) {
		//printf("\nPREFIX: ");
		//for (uint32_t i = 0; i < m_events.size(); i++) if ((*it)[i]) printf("%d ", i);
		//printf("\n");
		/*for (uint32_t i = 0; i < m_events.size(); i++) if ((*it)[i]) printf("%d ", m_events[i]);
		printf("\n");*/

		buildstates(*it);
		computetarget();

		initparameters();


		iterscale();
		settransitions();

		double mean, var;
		computemean(s, mean, var);

		double r;

		if (mean > 10) {
			if (m_support <= mean) {
				boost::math::normal d(mean, sqrt(var));
				r = log(boost::math::cdf(complement(d, m_support)));
			}
			else
				r = normallogcdf(m_support, mean, var);
		}
		else {
			boost::math::poisson d(mean);
			r = log(boost::math::cdf(complement(d, m_support)));
		}

		//printf("%f %f %f %f\n", mean, var, r, normallogcdf(m_support, mean, var));
		m_pred = std::max(m_pred, mean);
		m_rank = std::max(m_rank, r);
	}

	//printf("RANK: %f %f %d\n", m_rank, exp(m_rank), m_support);
}

double
episode::error() const
{
	doublevector res(m_target.size());
	for (basicmachine::nodemap::const_iterator it = m_machine.nodes().begin(); it != m_machine.nodes().end(); ++it) {
		basicmachine::node *n = it->second;

		doublevector r = computeprob(n);
		for (uint32_t i = 0; i < r.size(); i++)
			res[i] += n->value.count * r[i];
	}
	/*
	for (uint32_t i = 0; i < res.size(); i++)
		printf("%f ", res[i]);
	printf("\n");
	*/

	double err = 0;
	for (uint32_t i = 0; i < res.size(); i++) 
		err = std::max(err, fabs(res[i] - m_target[i]));

	return err;
}


void
episode::computegradients(double *hessian, double *gradient) const
{
	uint32_t s = m_target.size();
	uint32_t p = m_psize;

	std::fill(hessian, hessian + s*s, 0);
	std::fill(gradient, gradient + s, 0);
	for (basicmachine::nodemap::const_iterator it = m_machine.nodes().begin(); it != m_machine.nodes().end(); ++it) {
		basicmachine::node *n = it->second;

		doublevector r = computeprob(n);

		for (uint32_t i = 0; i < s; i++) {
			gradient[i] -= n->value.count * r[i];
			hessian[i*s + i] += n->value.count * r[i];
		}

		for (uint32_t i = 0; i < p; i++) {
			hessian[i*s + p + n->value.estates[i]] += n->value.count * r[i];
			hessian[i + s*(p + n->value.estates[i])] += n->value.count * r[i];
		}


		for (uint32_t i = 0; i < s; i++) {
			for (uint32_t j = 0; j < s; j++) {
				hessian[i*s + j] -= n->value.count * r[i] * r[j];
			}
		}
	}

	for (uint32_t i = 0; i < s; i++) hessian[i*s + i] += 10e-5; // to control singularity

	/*
	printf("CURRENT: \n");
	for (uint32_t i = 0; i < s; i++)
		printf("%f ", -gradient[i]);
	printf("\n");
	*/

	for (uint32_t i = 0; i < s; i++)
		gradient[i] += m_target[i];


	/*
	for (uint32_t i = 0; i < s; i++) {
		for (uint32_t j = 0; j < s; j++) {
			printf("%.3f ", hessian[i*s + j]);
		}
		printf("\n");
	}
	*/
}


void
episode::settransitions()
{
	for (basicmachine::nodemap::iterator it = m_machine.nodes().begin(); it != m_machine.nodes().end(); ++it) {
		basicmachine::node *n = it->second;

		doublevector p = computeprob(n);

		/*
		for (uint32_t i = 0; i < p.size(); i++)
			printf("%f ", p[i]);
		printf(" (%d)\n", n->value.count);
		*/


		n->value.stay = 1;
		for (basicmachine::edgemap::iterator it2 = n->to.begin(); it2 != n->to.end(); ++it2) {
			basicmachine::edge *e = it2->second;
			e->value.p = p[e->value.index];
			n->value.stay -= e->value.p;
		}

		//printf("STAY: %f\n", n->value.stay);

		n->value.stay = std::max(n->value.stay, 0.0); // just in case for numerical stability
	}
}

void
episode::computeexpectation(doublevector & p)
{
	basicmachine::nodevector nodes = m_machine.order();

	for (uint32_t i = 0; i < nodes.size(); i++)
		nodes[i]->value.mass = 0;
	
	m_source->value.mass = 1;
	p[0] = m_sink->value.mass;

	for (uint32_t i = 1; i < p.size(); i++) {
		for (int32_t j = nodes.size() - 1; j >= 0; j--) {
			basicmachine::node *n = nodes[j];
			n->value.mass *= n->value.stay;
			for (basicmachine::edgemap::iterator it2 = n->from.begin(); it2 != n->from.end(); ++it2) {
				basicmachine::edge *e = it2->second;
				n->value.mass += e->from->value.mass * e->value.p;
			}
		}
		p[i] = m_sink->value.mass;
	}

}

void
episode::computemean(const sequence & s, double & mean, double & var)
{
	index_t m = 1;
	for (uint32_t i = 0; i < s.rowcount(); i++) {
		m = std::max(m, s.length(i));
	}

	doublevector p(m + 1);
	computeexpectation(p);

	mean = 0;
	var = 0;

	for (uint32_t i = 0; i < s.rowcount(); i++) {
		double q = p[s.length(i)];
		mean += q;
		var += q*(1 - q);
	}
}



void
episode::iterscale()
{
	uint32_t N = m_target.size();
	double *gradient = new double[N];
	double *hessian = new double[N*N];
	double *diff = new double[N];


	//printf("ERR: %f\n", error());

	//for (uint32_t i = 0; i < 10; i++) {
	while (error() > 10e-5) {
		computegradients(hessian, gradient);

		gsl_matrix_view m = gsl_matrix_view_array(hessian, N, N);
		gsl_vector_view b = gsl_vector_view_array(gradient, N);
		gsl_permutation *p = gsl_permutation_alloc (N);
		gsl_vector_view d = gsl_vector_view_array(diff, N);

		//gsl_matrix_fprintf(stdout, &m.matrix, "%f");

		int a;
		gsl_linalg_LU_decomp(&m.matrix, p, &a);
		gsl_linalg_LU_solve(&m.matrix, p, &b.vector, &d.vector);
		gsl_permutation_free(p);

		//gsl_vector_fprintf(stdout, &d.vector, "%g");

		double alpha = 1;
		double base = cost(0, diff);
		for (;cost(alpha, diff) <= base; alpha *= 0.5) {
			//printf("COST: %f %f\n", cost(alpha, diff), alpha);
			if (alpha < 10e-5) break; // stuck in the model.
		}

		if (alpha < 10e-5) {
			// newton failed, try standard gradient.
			alpha = 1; 
			for (;cost(alpha, gradient) <= base; alpha *= 0.5) {
				if (alpha == 0) break; // stuck in the model.
			}
			if (alpha == 0) break; // break the main loop, if we are stuck
			updateparameters(alpha, gradient);
		}
		else
			updateparameters(alpha, diff);
		//printf("ERR: %f\n", error());
	}



	delete [] hessian;
	delete [] gradient;
	delete [] diff;
}




/*
// Mainly used for debugging
void
episode::print(FILE *f) const
{
	fprintf(f, "graph: %d\n", m_id); 

	fprintf(f, "eventcnt: %lu\n", m_events.size());
	fprintf(f, "labels:");
	for (uint32_t i = 0; i < m_events.size(); i++) {
		fprintf(f, " %d %d", i, m_events[i]);
	}

	fprintf(f, "\n");

	m_weak.print(f);
	m_proper.print(f);
	fprintf(f, "m-support: %d\n\n", m_support);
}
*/

/*
void
episode::print(FILE *f) const
{
	fprintf(f, "graph: %d\n", m_id); 



	fprintf(f, "eventcnt: %lu\n", m_events.size());
	fprintf(f, "labels:");
	for (uint32_t i = 0; i < m_events.size(); i++) {
		fprintf(f, " %d", m_events[i]); 
	}

	fprintf(f, "\n");

	m_edges.print(f);
	fprintf(f, "m-support: %d\n\n", m_support);
}
*/

void
episode::print(FILE *f) const
{
	fprintf(f, "graph: %d\n", m_id); 



	/*
	fprintf(f, "eventcnt: %lu\n", m_events.size());
	fprintf(f, "labels:");
	for (uint32_t i = 0; i < m_events.size(); i++) {
		fprintf(f, " %s", s.label(m_events[i]).c_str()); 
	}

	fprintf(f, "\n");


	m_edges.print(f);
	*/
	fprintf(f, "m-partrank: %f\n", m_rank);
	fprintf(f, "m-indrank: %f\n", m_indrank);
	fprintf(f, "m-parterr: %f\n", m_support - m_pred);
	fprintf(f, "m-inderr: %f\n", m_support - m_indpred);
	fprintf(f, "m-rankdiff: %f\n", m_rank - m_indrank);
	fprintf(f, "m-rankprop: %f\n", (m_rank - m_indrank) / m_rank);
	fprintf(f, "m-rankprop2: %f\n\n", (m_indrank - m_rank) / m_indrank);
}

stringvector
split(const std::string & s)
{
	stringvector strings;
	std::istringstream f(s);
	std::string line;    
	while (std::getline(f, line, ' '))
		if (line.size() > 0) strings.push_back(line);

	return strings;
}


episode *
read(const sequence & s, std::ifstream & f)
{
	std::string line;
	uint32_t id = 0;
	bool found = false;

	while (std::getline(f, line)) {
		stringvector toks = split(line);
		if (toks.size() == 0) continue;
		if (toks[0] == "graph:") {
			id = atoi(toks[1].c_str());
			found = true;
			break;
		}
		assert(0);
	}

	if (!found) return 0;

	symbolvector labels;
	intvector from;
	intvector to;

	while (std::getline(f, line)) {
		stringvector toks = split(line);
		if (toks.size() == 0) break;

		//if ((toks[0] == "labels:" || toks[0] == "nodes:") && toks.size() > 1) {
		if (toks[0] == "nodes:") {
			labels.resize(toks.size() - 1);			
			for (uint32_t i = 0; i < labels.size(); i++)
				labels[i] = s.index(toks[i + 1]);
		}
		else if (toks[0] == "size:") {
		}
		else if (toks[0] == "edges:") {
			assert(toks.size() % 3 == 1);
			from.resize((toks.size() - 1) / 3);
			to.resize((toks.size() - 1) / 3);

			for (uint32_t i = 0; i < from.size(); i++) {
				from[i] = atoi(toks[i*3 + 1].c_str());
				to[i] = atoi(toks[i*3 + 3].c_str());
			}

		}

	}

	episode *ep = new episode(id, labels);

	graph & g = ep->edges();

	for (uint32_t i = 0; i < from.size(); i++) {
		g.add_edge(from[i], to[i]);
	}

	g.closure();
	ep->build();

	return ep;
}

episodelist
read(const sequence & s, const char *name)
{
	episodelist ret;

	std::ifstream f(name);

	while (episode *ep = read(s, f)) ret.push_back(ep);

	return ret;
}



