00001 #ifndef ARPAREADER_HH 00002 #define ARPAREADER_HH 00003 00004 #include "SymbolMap.hh" 00005 00006 namespace bit { 00007 00018 class ArpaReader { 00019 public: 00021 ArpaReader(FILE *file = NULL); 00022 00024 void reset(FILE *file); 00025 00027 void read_header(); 00028 00035 bool read_ngram(); 00036 00040 bool read_order_ngrams(bool sort = false); 00041 00043 struct Options { 00046 bool throw_header_mismatch; 00047 00049 bool show_progress; 00050 00052 std::vector<std::string> ignore_symbols; 00053 } opt; 00054 00056 struct Header { 00057 int order; 00058 int num_ngrams_total; 00059 00061 std::vector<int> num_ngrams; 00062 } header; 00063 00065 struct Ngram { 00066 std::vector<int> symbols; 00067 float log_prob; 00068 float backoff; 00069 bool operator<(const Ngram &ngram) const 00070 { 00071 size_t i = 0; 00072 while (1) { 00073 if (i == symbols.size() && i == ngram.symbols.size()) 00074 return false; 00075 if (i == symbols.size()) 00076 return true; 00077 if (i == ngram.symbols.size()) 00078 return false; 00079 if (symbols[i] < ngram.symbols[i]) 00080 return true; 00081 if (ngram.symbols[i] < symbols[i]) 00082 return false; 00083 i++; 00084 } 00085 } 00086 } ngram; 00087 00088 std::vector<Ngram> order_ngrams; 00089 std::vector<int> sorted_order; 00090 00091 SymbolMap<std::string, int> symbol_map; 00092 bool end_reached; 00093 00094 private: 00095 FILE *m_file; 00096 int m_current_order; 00097 int m_ngrams_read; 00098 }; 00099 00100 }; 00101 00102 #endif /* ARPAREADER_HH */