#include <algorithm>
#include <string>
#include <limits>
#include <assert.h>
#include "sequence.h"

#include <iostream>
#include <fstream>
#include <sstream>

sequence::sequence(const char *name, const params & p)
{
	if (p.sparse)
		read_sparse(name, p.sid);
	else
		read_full(name, p.sid);
}



struct eventiterator{
	const sequence::event *e;

	bool operator != (const eventiterator & a) const {return e != a.e;}
	bool operator == (const eventiterator & a) const {return e == a.e;}
	eventiterator & operator ++ () {e = TAILQ_NEXT(e, similar); return *this;}
	const sequence::event * operator * () {return e;}
};




void
sequence::read_full(const char *name, bool readsid)
{
	uint32_t cnt = 0;
	uint32_t tcnt = 0;

	std::ifstream f(name);

	std::string l;

	uint32_t scnt = 1;

	while (f >> l) {
		if (readsid && l == "-1") { scnt++; continue;}
		if (m_labelmap.find(l) == m_labelmap.end())
			m_labelmap[l] = cnt++;
		tcnt++;
	}

	f.clear();
	f.seekg(0);

	m_sequence.resize(scnt);
	m_events.resize(cnt);
	m_labels.resize(cnt);
	m_counts.resize(cnt);
	m_data.resize(tcnt);

	for (uint32_t i = 0; i < cnt; i++)
		TAILQ_INIT(&m_events[i]);
	for (uint32_t i = 0; i < scnt; i++)
		TAILQ_INIT(&m_sequence[i]);

	for (symmap::iterator it = m_labelmap.begin(); it != m_labelmap.end(); ++it)
		m_labels[it->second] = it->first;


	uint32_t c = 0;
	uint32_t sid = 0;
	while (f >> l) {
		if (readsid && l == "-1") {
			sid++;
			continue;
		}
		int32_t id = m_labelmap[l];

		event & e = m_data[c];
		e.s.index = c;
		e.s.sid = sid;
		e.label = id;
		e.id = c;
		TAILQ_INSERT_TAIL(&m_sequence[sid], &e, entries);
		TAILQ_INSERT_TAIL(&m_events[id], &e, similar);
		m_counts[id]++;
		c++;
	}

	compute_counts();
}

bool
eventcomp(const sequence::event & e, const sequence::event & f)
{
	return e.s < f.s || (e.s == f.s && e.id < f.id);
}


void
sequence::read_sparse(const char *name, bool readsid)
{
	uint32_t cnt = 0;
	index_t ind;
	uint32_t tcnt = 0;
	uint32_t scnt = 0;

	std::ifstream f(name);


	std::string l;

	while (f >> ind >> l) {
		uint32_t sid = 0;
		if (readsid) f >> sid;
		scnt = std::max(sid, scnt);
		if (m_labelmap.find(l) == m_labelmap.end())
			m_labelmap[l] = cnt++;
		tcnt++;
	}

	f.clear();
	f.seekg(0);

	m_sequence.resize(scnt);

	m_events.resize(cnt);
	m_labels.resize(cnt);
	m_data.resize(tcnt);

	for (uint32_t i = 0; i < cnt; i++)
		TAILQ_INIT(&m_events[i]);
	for (uint32_t i = 0; i < scnt; i++)
		TAILQ_INIT(&m_sequence[i]);

	for (symmap::iterator it = m_labelmap.begin(); it != m_labelmap.end(); ++it)
		m_labels[it->second] = it->first;

	uint32_t c = 0;
	cnt = 0;
	while (f >> ind >> l) {
		uint32_t sid = 0;
		if (readsid) f >> sid;

		int32_t id = m_labelmap[l];

		event & e = m_data[c];
		e.s.index = ind;
		e.s.sid = sid; 
		e.label = id;
		e.id = cnt++;
		c++;
	}

	std::sort(m_data.begin(), m_data.end(), eventcomp);

	for (uint32_t i = 0; i < m_data.size(); i++) {	
		event & e = m_data[i];
		TAILQ_INSERT_TAIL(&m_sequence[e.s.sid], &e, entries);
		TAILQ_INSERT_TAIL(&m_events[e.label], &e, similar);
		e.id = i;
	}

	compute_counts();
}

void
sequence::compute_counts()
{
	m_total = 0;
	for (uint32_t i = 0; i < m_sequence.size(); i++) {
		m_total += length(i);
	}
}




bool
sseventcomp(const sequence::event * e, const sequence::event * f)
{
	return e->s > f->s || (e->s == f->s && e->id > f->id);
}

substringiterator::substringiterator(const sequence & s, const symbolvector & labels) :
	m_heap(labels.size()), m_size(0)
{
	for (uint32_t i = 0; i < labels.size(); i++) {
		m_heap[m_size] = TAILQ_FIRST(&s.events(labels[i]));
		if (m_heap[m_size]) m_size++;
	}

	std::make_heap(m_heap.begin(), m_heap.begin() + m_size, sseventcomp);
}

substringiterator &
substringiterator::operator ++()
{
	std::pop_heap(m_heap.begin(), m_heap.begin() + m_size, sseventcomp);
	m_size--;
	m_heap[m_size] = TAILQ_NEXT(m_heap[m_size], similar);

	if (m_heap[m_size]) {
		m_size++;
		std::push_heap(m_heap.begin(), m_heap.begin() + m_size, sseventcomp);
	}

	return *this;
}
