// Copyright 2005,2006 Jouni K. Seppnen          -*- coding: iso-8859-1 -*-
// Distributed under the Boost Software License, Version 1.0.
// See accompanying file LICENSE.

#ifndef DENSE_H
#define DENSE_H

#include <iostream>
#include <map>
#include <string>
#include <vector>

namespace dense {

  using std::map;
  using std::ostream;
  using std::string;
  using std::vector;

  typedef unsigned long ulong;

  class DenseItemsetSearch {
  protected:
    // Input parameters: filename, sigma, delta
    string inFilename;
    double sigma, delta;
    // Computed from input: how many tuples the file has, and how many
    // lines does sigma correspond to
    ulong dataSize, tupleThreshold;

    // Mapping of items to words, filled in by scan1
    vector<std::string> words;
    // Inverse mapping for output, also filled in by scan1
    map<string, ulong> wordMapping;
    // Called by scan1 to fill in the mappings: either find the
    // existing mapping or create a new one.
    ulong findOrMakeMapping(const string &word);
    // Next available item.
    ulong nextWord;

    // Number of items in each candidate itemset, set by makeCandidates*
    int level;
    // Candidate itemsets, created by makeCandidates*
    vector<ulong*> candidates;
    // Intersection statistics, created by scan*
    vector<ulong*> istat;

    // Read file once, compute word-item mapping, find frequency of
    // each item into istat.
    void scan1(void);
    // Remove items that can never be part of a dense itemset, and
    // print the (weak) densities of dense singleton itemsets. 
    // Renumber items so that the sufficiently dense ones are first,
    // and fix up istat, words and wordMapping.
    void pruneAndPrint1(ostream &output);
    // Create size-2 candidates and istat using the size of istat as
    // input. Set level to 2.
    void makeCandidates2(void);
    // Re-read file, fill in istat.
    void scan(void);
    // Print into output the dense candidates prefixed by their weak
    // densities, and remove non-dense candidates.
    void pruneAndPrint(ostream &output);
    // Make new candidates, increasing level by 1.
    void makeCandidates(void);

  public:
    // Constructor
    DenseItemsetSearch(string filename, double sigma_, double delta_)
      : inFilename(filename), sigma(sigma_), delta(delta_),
        dataSize(0), tupleThreshold(0), words(), wordMapping(), 
        nextWord(0), level(0), candidates(), istat() 
      {}
    // Do the search.
    void run(ostream &output, ostream &status);
  };
}

#endif

