00001 #ifndef BIT_COMPRESSEDARRAY_HH
00002 #define BIT_COMPRESSEDARRAY_HH
00003
00004 #include "bit/algo.hh"
00005 #include "bit/Array.hh"
00006 #include "str/str.hh"
00007
00008 namespace bit {
00009
00021 class CompressedArray {
00022 public:
00023
00028 static const u64 constant_size_cost = 5;
00029
00030 CompressedArray()
00031 : m_inv_array(NULL), m_shift(0)
00032 {
00033 }
00034
00035 CompressedArray(const Array &array)
00036 : m_array(array), m_inv_array(NULL), m_shift(0)
00037 {
00038 }
00039
00040 CompressedArray(const CompressedArray &array)
00041 : m_array(array.m_array), m_inv_array(array.m_inv_array),
00042 m_shift(array.m_shift)
00043 {
00044 if (m_inv_array != NULL)
00045 m_inv_array = new CompressedArray(*m_inv_array);
00046 }
00047
00055 CompressedArray(u64 num_elems, unsigned int bits_per_elem)
00056 : m_array(num_elems, bits_per_elem), m_inv_array(NULL), m_shift(0)
00057 {
00058 }
00059
00060 ~CompressedArray()
00061 {
00062 delete m_inv_array;
00063 }
00064
00065 CompressedArray &operator=(const CompressedArray &array)
00066 {
00067 if (&array == this)
00068 return *this;
00069 m_array = array.m_array;
00070
00071 if (m_inv_array != NULL) {
00072 assert(m_inv_array != array.m_inv_array);
00073 delete m_inv_array;
00074 }
00075
00076 m_inv_array = array.m_inv_array;
00077 if (m_inv_array != NULL)
00078 m_inv_array = new CompressedArray(*m_inv_array);
00079 m_shift = array.m_shift;
00080
00081 return *this;
00082 }
00083
00085 bool is_compressed() const {
00086 return m_inv_array != NULL;
00087 }
00088
00090 u64 num_elems() const {
00091 return m_array.num_elems();
00092 }
00093
00097 unsigned int shift() const {
00098 if (!m_inv_array)
00099 throw bit::invalid_call(
00100 "bit::CompressedArray::shift() not compressed");
00101 return m_shift;
00102 }
00103
00107 u64 compressed_size() const {
00108 u64 size = m_array.data_len();
00109 if (m_inv_array)
00110 size += m_inv_array->compressed_size() + constant_size_cost;
00111 return size;
00112 }
00113
00120 void resize(u64 num_elems)
00121 {
00122 if (m_inv_array)
00123 throw bit::invalid_call(
00124 "bit::CompressedArray::resize() called after compression");
00125 m_array.resize(num_elems);
00126 }
00127
00135 void set_width(int bits_per_elem)
00136 {
00137 if (m_inv_array)
00138 throw bit::invalid_call(
00139 "bit::CompressedArray::set() called after compression");
00140 m_array.set_width(bits_per_elem);
00141 }
00142
00152 void set(u64 elem, u32 value)
00153 {
00154 if (m_inv_array)
00155 throw bit::invalid_call(
00156 "bit::CompressedArray::set() called after compression");
00157 m_array.set(elem, value);
00158 }
00159
00175 void set_grow(u64 elem, u32 value)
00176 {
00177 if (m_inv_array)
00178 throw bit::invalid_call(
00179 "bit::CompressedArray::set_grow() called after compression");
00180 m_array.set_grow(elem, value);
00181 }
00182
00191 void set_grow_widen(u64 elem, u32 value)
00192 {
00193 if (m_inv_array)
00194 throw bit::invalid_call(
00195 "bit::CompressedArray::set_grow_widen() called after compression");
00196 m_array.set_grow_widen(elem, value);
00197 }
00198
00204 u32 get(u64 elem) const
00205 {
00206 if (elem >= m_array.num_elems())
00207 throw bit::out_of_range("bit::Array::get(): out of range");
00208 if (!m_inv_array)
00209 return m_array.get(elem);
00210
00211
00212 u64 inv = m_inv_array->last_leq(elem);
00213 assert(inv < max_u64);
00214 return inv << m_shift | m_array.get(elem);
00215 }
00216
00227 void compress(unsigned int shift)
00228 {
00229 if (m_inv_array)
00230 throw bit::invalid_call(
00231 "bit::CompressedArray::compress() already compressed");
00232
00233 if (shift >= m_array.bits_per_elem())
00234 throw bit::invalid_argument(
00235 "bit::CompressedArray::compress() shift too large");
00236
00237 if (m_array.num_elems() > 0xffffffff)
00238 throw bit::out_of_range(
00239 "bit::CompressedArray::compress() too many elements");
00240
00241 if (shift == m_array.bits_per_elem())
00242 return;
00243 if (m_array.num_elems() == 0)
00244 return;
00245
00246
00247
00248
00249
00250 {
00251 Array new_array(m_array);
00252 u32 prev = new_array.get(0);
00253 for (u64 i = 0; i < new_array.num_elems(); i++) {
00254 u32 value = new_array.get(i);
00255 if (value < prev)
00256 throw bit::invalid_argument(
00257 "bit::CompressedArray::compress() array not sorted");
00258 prev = value;
00259 if (shift > 0)
00260 new_array.set(i, value >> shift);
00261 }
00262 m_inv_array = new CompressedArray(inverse(new_array));
00263 }
00264
00265 m_array = mask(m_array, shift);
00266 m_shift = shift;
00267 }
00268
00274 void optimal_compress()
00275 {
00276 if (m_inv_array)
00277 throw bit::invalid_call(
00278 "bit::CompressedArray::compress() already compressed");
00279
00280 unsigned int best_shift = (unsigned int)-1;
00281 u64 best_size = compressed_size();
00282 for (unsigned int shift = 0; shift < m_array.bits_per_elem(); shift++) {
00283 CompressedArray array(*this);
00284 array.compress(shift);
00285 if (array.compressed_size() < best_size) {
00286 best_shift = shift;
00287 best_size = array.compressed_size();
00288 }
00289 }
00290
00291 if (best_shift == (unsigned int)-1)
00292 return;
00293
00294 if (best_shift < m_array.bits_per_elem())
00295 compress(best_shift);
00296 }
00297
00304 void recursive_optimal_compress()
00305 {
00306 optimal_compress();
00307 if (is_compressed())
00308 m_inv_array->recursive_optimal_compress();
00309 }
00310
00313 void uncompress()
00314 {
00315 if (!m_inv_array)
00316 return;
00317
00318 Array new_array;
00319 for (u64 i = 0; i < m_array.num_elems(); i++)
00320 new_array.set_grow_widen(i, get(i));
00321 delete m_inv_array;
00322 m_inv_array = NULL;
00323 m_array = new_array;
00324 m_shift = 0;
00325 }
00326
00336 u64 last_leq(u32 value)
00337 {
00338 if (!m_inv_array)
00339 return bit::last_leq(m_array, value);
00340
00341 u32 s = value >> m_shift;
00342
00343 if (s >= m_inv_array->num_elems()) {
00344 if (m_array.num_elems() == 0)
00345 return max_u64;
00346 return m_array.num_elems() - 1;
00347 }
00348
00349 u64 first = m_inv_array->get(s);
00350 u64 limit = m_array.num_elems();
00351 if (s + 1 < m_inv_array->num_elems())
00352 limit = m_inv_array->get(s + 1);
00353 assert(first <= limit);
00354 u64 last_leq = bit::last_leq(m_array, value & one_masks[m_shift],
00355 first, limit);
00356 if (last_leq == max_u64) {
00357 if (first == 0)
00358 return max_u64;
00359 return first - 1;
00360 }
00361
00362 return last_leq;
00363 }
00364
00365 template <class A>
00366 static A inverse(const A &array)
00367 {
00368 A inv;
00369 for (int j = 0;; j++) {
00370 u64 bound = bit::lower_bound(array, j, 0, array.num_elems());
00371 if (bound == array.num_elems())
00372 break;
00373 inv.set_grow_widen(inv.num_elems(), bound);
00374 }
00375 return inv;
00376 }
00377
00378 template <class A>
00379 static A shift_array(const A &array, int k)
00380 {
00381 A ret(array);
00382 if (k > 0) {
00383 for (u64 i = 0; i < array.num_elems(); i++)
00384 ret.set(i, ret.get(i) >> k);
00385 }
00386 ret.set_width(highest_bit(max(ret)));
00387 return ret;
00388 }
00389
00390 template <class A>
00391 static A mask(const A &array, unsigned int k)
00392 {
00393 assert(k <= 32);
00394 int mask = one_masks[k];
00395 A ret(array);
00396 for (u64 i = 0; i < array.num_elems(); i++)
00397 ret.set(i, ret.get(i) & mask);
00398 ret.set_width(highest_bit(max(ret)));
00399 return ret;
00400 }
00401
00406 std::string debug_str(int indent = 0) const
00407 {
00408 u64 i = 0;
00409 bool a_end = false;
00410 bool i_end = false;
00411 std::string ret;
00412 ret.append(indent, ' ');
00413 ret.append(str::fmt(256, "m_shift = %d\n", m_shift));
00414 ret.append(indent, ' ');
00415 ret.append("i\tarray\tinv\n");
00416 while (1) {
00417
00418 if (i >= m_array.num_elems())
00419 a_end = true;
00420 if (!m_inv_array || i >= m_inv_array->num_elems())
00421 i_end = true;
00422
00423 if (a_end && i_end)
00424 break;
00425
00426 ret.append(indent, ' ');
00427 ret.append(str::fmt(64, "%d:\t", i));
00428 if (!a_end)
00429 ret.append(str::fmt(64, "%d", m_array.get(i)));
00430 ret.append("\t");
00431 if (!i_end)
00432 ret.append(str::fmt(64, "%d", m_inv_array->get(i)));
00433 ret.append("\n");
00434
00435 i++;
00436 }
00437 ret.append(indent, ' ');
00438 ret.append(str::fmt(256, "compressed size: %d bytes\n",
00439 compressed_size()));
00440
00441 if (m_inv_array)
00442 ret.append(m_inv_array->debug_str(indent + 2));
00443
00444 return ret;
00445 }
00446
00451 void write(FILE *file) const
00452 {
00453 fputs("CARRAY1:", file);
00454 try {
00455 m_array.write(file);
00456 }
00457 catch (bit::io_error &e) {
00458 throw bit::io_error(
00459 std::string("bit::CompressedArray::write() write failed") +
00460 e.what());
00461 }
00462 if (m_inv_array) {
00463 fputs("+", file);
00464 fprintf(file, "%d:", m_shift);
00465 m_inv_array->write(file);
00466 }
00467 else {
00468 fputs("-", file);
00469 }
00470 }
00471
00478 void read(FILE *file)
00479 {
00480 int version;
00481 int ret = fscanf(file, "CARRAY%d:", &version);
00482 if (ret != 1 || version != 1)
00483 throw bit::io_error(
00484 "bit::CompressedArray::read() error while reading header");
00485 try {
00486 m_array.read(file);
00487 }
00488 catch (bit::io_error &e) {
00489 throw bit::io_error(
00490 std::string("bit::CompressedArray::read() read failed: ")
00491 + e.what());
00492 }
00493 int c = fgetc(file);
00494 if (c == '-') {
00495 delete m_inv_array;
00496 m_inv_array = NULL;
00497 m_shift = 0;
00498 }
00499 else if (c == '+') {
00500 delete m_inv_array;
00501 m_inv_array = new CompressedArray();
00502 ret = fscanf(file, "%d:", &m_shift);
00503 if (ret != 1 || m_shift >= max_bits_per_value)
00504 throw bit::io_error(
00505 "bit::CompressedArray::read() file format error: invalid shift");
00506 m_inv_array->read(file);
00507 }
00508 else
00509 throw bit::io_error("bit::CompressedArray::read() file format error");
00510 }
00511
00512 private:
00513
00515 Array m_array;
00516
00520 CompressedArray *m_inv_array;
00521
00523 unsigned int m_shift;
00524
00525 };
00526
00527 };
00528
00529 #endif