CompressedArray.hh

Go to the documentation of this file.
00001 #ifndef BIT_COMPRESSEDARRAY_HH
00002 #define BIT_COMPRESSEDARRAY_HH
00003 
00004 #include "bit/algo.hh"
00005 #include "bit/Array.hh"
00006 #include "str/str.hh"
00007 
00008 namespace bit {
00009 
00021   class CompressedArray {
00022   public:
00023   
00028     static const u64 constant_size_cost = 5;
00029 
00030     CompressedArray()
00031       : m_inv_array(NULL), m_shift(0)
00032     {
00033     }
00034 
00035     CompressedArray(const Array &array)
00036       : m_array(array), m_inv_array(NULL), m_shift(0)
00037     {
00038     }
00039 
00040     CompressedArray(const CompressedArray &array)
00041       : m_array(array.m_array), m_inv_array(array.m_inv_array), 
00042         m_shift(array.m_shift)
00043     {
00044       if (m_inv_array != NULL)
00045         m_inv_array = new CompressedArray(*m_inv_array);
00046     }
00047 
00055     CompressedArray(u64 num_elems, unsigned int bits_per_elem)
00056       : m_array(num_elems, bits_per_elem), m_inv_array(NULL), m_shift(0)
00057     {
00058     }
00059 
00060     ~CompressedArray()
00061     {
00062       delete m_inv_array;
00063     }
00064 
00065     CompressedArray &operator=(const CompressedArray &array)
00066     {
00067       if (&array == this)
00068         return *this;
00069       m_array = array.m_array;
00070 
00071       if (m_inv_array != NULL) {
00072         assert(m_inv_array != array.m_inv_array);
00073         delete m_inv_array;
00074       }
00075 
00076       m_inv_array = array.m_inv_array;
00077       if (m_inv_array != NULL)
00078         m_inv_array = new CompressedArray(*m_inv_array);
00079       m_shift = array.m_shift;
00080 
00081       return *this;
00082     }
00083 
00085     bool is_compressed() const {
00086       return m_inv_array != NULL;
00087     }
00088 
00090     u64 num_elems() const { 
00091       return m_array.num_elems();
00092     }
00093 
00097     unsigned int shift() const {
00098       if (!m_inv_array)
00099         throw bit::invalid_call(
00100           "bit::CompressedArray::shift() not compressed");
00101       return m_shift;
00102     }
00103 
00107     u64 compressed_size() const {
00108       u64 size = m_array.data_len();
00109       if (m_inv_array)
00110         size += m_inv_array->compressed_size() + constant_size_cost;
00111       return size;
00112     }
00113 
00120     void resize(u64 num_elems)
00121     {
00122       if (m_inv_array)
00123         throw bit::invalid_call(
00124           "bit::CompressedArray::resize() called after compression");
00125       m_array.resize(num_elems);
00126     }
00127      
00135     void set_width(int bits_per_elem)
00136     {
00137       if (m_inv_array)
00138         throw bit::invalid_call(
00139           "bit::CompressedArray::set() called after compression");
00140       m_array.set_width(bits_per_elem);
00141     }    
00142 
00152     void set(u64 elem, u32 value)
00153     {
00154       if (m_inv_array)
00155         throw bit::invalid_call(
00156           "bit::CompressedArray::set() called after compression");
00157       m_array.set(elem, value);
00158     }
00159 
00175     void set_grow(u64 elem, u32 value)
00176     {
00177       if (m_inv_array)
00178         throw bit::invalid_call(
00179           "bit::CompressedArray::set_grow() called after compression");
00180       m_array.set_grow(elem, value);
00181     }
00182 
00191     void set_grow_widen(u64 elem, u32 value)
00192     {
00193       if (m_inv_array)
00194         throw bit::invalid_call(
00195           "bit::CompressedArray::set_grow_widen() called after compression");
00196       m_array.set_grow_widen(elem, value);
00197     }
00198 
00204     u32 get(u64 elem) const
00205     {
00206       if (elem >= m_array.num_elems())
00207         throw bit::out_of_range("bit::Array::get(): out of range");
00208       if (!m_inv_array)
00209         return m_array.get(elem);
00210 
00211 //      u64 inv = bit::last_leq(*m_inv_array, elem, 0, m_inv_array->num_elems());
00212       u64 inv = m_inv_array->last_leq(elem);
00213       assert(inv < max_u64);
00214       return inv << m_shift | m_array.get(elem);
00215     }
00216 
00227     void compress(unsigned int shift)
00228     {
00229       if (m_inv_array)
00230         throw bit::invalid_call(
00231           "bit::CompressedArray::compress() already compressed");
00232 
00233       if (shift >= m_array.bits_per_elem())
00234         throw bit::invalid_argument(
00235           "bit::CompressedArray::compress() shift too large");
00236 
00237       if (m_array.num_elems() > 0xffffffff)
00238         throw bit::out_of_range(
00239           "bit::CompressedArray::compress() too many elements");
00240 
00241       if (shift == m_array.bits_per_elem())
00242         return;
00243       if (m_array.num_elems() == 0)
00244         return;
00245 
00246 //      m_inv_array = new CompressedArray(inverse(shift_array(m_array, shift)));
00247 
00248       // Create the inverse array
00249       //
00250       {
00251         Array new_array(m_array);
00252         u32 prev = new_array.get(0);
00253         for (u64 i = 0; i < new_array.num_elems(); i++) {
00254           u32 value = new_array.get(i);
00255           if (value < prev)
00256             throw bit::invalid_argument(
00257               "bit::CompressedArray::compress() array not sorted");
00258           prev = value;
00259           if (shift > 0)
00260             new_array.set(i, value >> shift);
00261         }
00262         m_inv_array = new CompressedArray(inverse(new_array));
00263       }
00264 
00265       m_array = mask(m_array, shift);
00266       m_shift = shift;
00267     }
00268 
00274     void optimal_compress()
00275     {
00276       if (m_inv_array)
00277         throw bit::invalid_call(
00278           "bit::CompressedArray::compress() already compressed");
00279 
00280       unsigned int best_shift = (unsigned int)-1;
00281       u64 best_size = compressed_size();
00282       for (unsigned int shift = 0; shift < m_array.bits_per_elem(); shift++) {
00283         CompressedArray array(*this);
00284         array.compress(shift);
00285         if (array.compressed_size() < best_size) {
00286           best_shift = shift;
00287           best_size = array.compressed_size();
00288         }
00289       }
00290 
00291       if (best_shift == (unsigned int)-1)
00292         return;
00293 
00294       if (best_shift < m_array.bits_per_elem())
00295         compress(best_shift);
00296     }
00297 
00304     void recursive_optimal_compress()
00305     {
00306       optimal_compress();
00307       if (is_compressed())
00308         m_inv_array->recursive_optimal_compress();
00309     }
00310 
00313     void uncompress()
00314     {
00315       if (!m_inv_array)
00316         return;
00317 
00318       Array new_array;
00319       for (u64 i = 0; i < m_array.num_elems(); i++)
00320         new_array.set_grow_widen(i, get(i));
00321       delete m_inv_array;
00322       m_inv_array = NULL;
00323       m_array = new_array;
00324       m_shift = 0;
00325     }
00326 
00336     u64 last_leq(u32 value)
00337     {
00338       if (!m_inv_array)
00339         return bit::last_leq(m_array, value);
00340 
00341       u32 s = value >> m_shift;
00342 
00343       if (s >= m_inv_array->num_elems()) {
00344         if (m_array.num_elems() == 0)
00345           return max_u64;
00346         return m_array.num_elems() - 1;
00347       }
00348 
00349       u64 first = m_inv_array->get(s);
00350       u64 limit = m_array.num_elems();
00351       if (s + 1 < m_inv_array->num_elems()) 
00352         limit = m_inv_array->get(s + 1);
00353       assert(first <= limit);
00354       u64 last_leq = bit::last_leq(m_array, value & one_masks[m_shift],
00355                                    first, limit);
00356       if (last_leq == max_u64) {
00357         if (first == 0)
00358           return max_u64;
00359         return first - 1;
00360       }
00361   
00362       return last_leq;
00363     }
00364 
00365     template <class A>
00366     static A inverse(const A &array)
00367     {
00368       A inv;
00369       for (int j = 0;; j++) {
00370         u64 bound = bit::lower_bound(array, j, 0, array.num_elems());
00371         if (bound == array.num_elems())
00372           break;
00373         inv.set_grow_widen(inv.num_elems(), bound);
00374       }
00375       return inv;
00376     }
00377 
00378     template <class A>
00379     static A shift_array(const A &array, int k)
00380     {
00381       A ret(array);
00382       if (k > 0) {
00383         for (u64 i = 0; i < array.num_elems(); i++)
00384           ret.set(i, ret.get(i) >> k);
00385       }
00386       ret.set_width(highest_bit(max(ret)));
00387       return ret;
00388     }
00389 
00390     template <class A>
00391     static A mask(const A &array, unsigned int k)
00392     {
00393       assert(k <= 32);
00394       int mask = one_masks[k];
00395       A ret(array);
00396       for (u64 i = 0; i < array.num_elems(); i++)
00397         ret.set(i, ret.get(i) & mask);
00398       ret.set_width(highest_bit(max(ret)));
00399       return ret;
00400     }
00401 
00406     std::string debug_str(int indent = 0) const
00407     {
00408       u64 i = 0;
00409       bool a_end = false;
00410       bool i_end = false;
00411       std::string ret;
00412       ret.append(indent, ' ');
00413       ret.append(str::fmt(256, "m_shift = %d\n", m_shift));
00414       ret.append(indent, ' ');
00415       ret.append("i\tarray\tinv\n");
00416       while (1) {
00417         
00418         if (i >= m_array.num_elems())
00419           a_end = true;
00420         if (!m_inv_array || i >= m_inv_array->num_elems())
00421           i_end = true;
00422         
00423         if (a_end && i_end)
00424           break;
00425 
00426         ret.append(indent, ' ');
00427         ret.append(str::fmt(64, "%d:\t", i));
00428         if (!a_end)
00429           ret.append(str::fmt(64, "%d", m_array.get(i)));
00430         ret.append("\t");
00431         if (!i_end)
00432           ret.append(str::fmt(64, "%d", m_inv_array->get(i)));
00433         ret.append("\n");
00434 
00435         i++;
00436       }
00437       ret.append(indent, ' ');
00438       ret.append(str::fmt(256, "compressed size: %d bytes\n", 
00439                           compressed_size()));
00440       
00441       if (m_inv_array)
00442         ret.append(m_inv_array->debug_str(indent + 2));
00443 
00444       return ret;
00445     }
00446 
00451     void write(FILE *file) const
00452     {
00453       fputs("CARRAY1:", file);
00454       try {
00455         m_array.write(file);
00456       }
00457       catch (bit::io_error &e) {
00458         throw bit::io_error(
00459           std::string("bit::CompressedArray::write() write failed") +
00460           e.what());
00461       }
00462       if (m_inv_array) {
00463         fputs("+", file);
00464         fprintf(file, "%d:", m_shift);
00465         m_inv_array->write(file);
00466       }
00467       else {
00468         fputs("-", file);
00469       }
00470     }
00471 
00478     void read(FILE *file)
00479     {
00480       int version;
00481       int ret = fscanf(file, "CARRAY%d:", &version);
00482       if (ret != 1 || version != 1)
00483         throw bit::io_error(
00484           "bit::CompressedArray::read() error while reading header");
00485       try {
00486         m_array.read(file);
00487       }
00488       catch (bit::io_error &e) {
00489         throw bit::io_error(
00490           std::string("bit::CompressedArray::read() read failed: ") 
00491           + e.what());
00492       }
00493       int c = fgetc(file);
00494       if (c == '-') {
00495         delete m_inv_array;
00496         m_inv_array = NULL;
00497         m_shift = 0;
00498       }
00499       else if (c == '+') {
00500         delete m_inv_array;
00501         m_inv_array = new CompressedArray();
00502         ret = fscanf(file, "%d:", &m_shift);
00503         if (ret != 1 || m_shift >= max_bits_per_value)
00504           throw bit::io_error(
00505             "bit::CompressedArray::read() file format error: invalid shift");
00506         m_inv_array->read(file);
00507       }
00508       else
00509         throw bit::io_error("bit::CompressedArray::read() file format error");
00510     }    
00511 
00512   private:
00513 
00515     Array m_array;
00516     
00520     CompressedArray *m_inv_array;
00521 
00523     unsigned int m_shift;
00524 
00525   };
00526 
00527 };
00528 
00529 #endif /* BIT_COMPRESSEDARRAY_HH */

Generated on Mon Jan 8 15:51:03 2007 for bit by  doxygen 1.4.6