SIS
Symmetric Index Structures
|
00001 #ifndef INENAGACDAWGADAPTER_HPP 00002 #define INENAGACDAWGADAPTER_HPP 00003 00004 #include "../cppbase.hpp" 00005 00006 #include <iostream> 00007 #include <fstream> 00008 #include <vector> 00009 #include <map> 00010 #include <utility> 00011 #include <string> 00012 #include <memory> 00013 #include <tuple> 00014 #include <cassert> 00015 00016 // #include "DocumentIndexingCompressedAutomaton.hpp" 00017 #include "CompressedAutomatonAdapter.hpp" 00018 #include "CompressedAutomatonSerializing.hpp" 00019 #include "SerializingAutomaton.hpp" 00020 00021 namespace lmu { namespace cis { namespace sis { 00022 00023 class InenagaCDAWGAdapter 00024 : public virtual CompressedAutomatonAdapter 00025 // , public virtual CompressedAutomatonSerializing 00026 { 00027 protected: 00028 CDAWGBuildHelp* C_CDAWGBuildHelp; 00029 00030 public: 00031 00033 InenagaCDAWGAdapter() : InenagaCDAWGAdapter(1) { 00034 if (DEBUG_LEVEL > 5) std::cerr << "InenagaCDAWGAdapter::InenagaCDAWGAdapter( UINT symbolsize = " << C_symbolsize << " )" << std::endl; 00035 } 00036 00037 InenagaCDAWGAdapter(UINT symbolsize) 00038 : CompressedAutomatonAdapter(symbolsize) 00039 , C_CDAWGBuildHelp( CDAWGBuildHelpInit( this->C_CompressedAutomaton ) ) { 00040 if (DEBUG_LEVEL > 5) std::cerr << "InenagaCDAWGAdapter::InenagaCDAWGAdapter( UINT symbolsize = " << symbolsize << " )" << std::endl; 00041 } 00042 00043 virtual ~InenagaCDAWGAdapter() { 00044 if (DEBUG_LEVEL > 5) std::cerr << "virtual InenagaCDAWGAdapter::~InenagaCDAWGAdapter( )" << std::endl; 00045 // CDAWGBuildHelpFree( C_CDAWGBuildHelp ); //WARN! Memory leak?? 00046 } 00047 00048 InenagaCDAWGAdapter(const InenagaCDAWGAdapter& rhs) = delete; 00049 InenagaCDAWGAdapter& operator=(const InenagaCDAWGAdapter& rhs) = delete; 00050 00052 00053 00055 00056 virtual SinkState Add( const VoidSequence * documentDollar ); 00057 00063 virtual void DumpGV( const S8 * fn, UINT encoding = ENCODING_PLAIN ) const; 00065 00069 virtual void Close(); 00070 virtual void SortTransitions(); 00071 virtual void SortTransitions( UINTSequence * _transitionsFrom ); 00073 00074 virtual inline CompressedAutomaton * Read( FILE * fp ) { 00075 std::cout << "CompressedAutomaton * InenagaCDAWGAdapter::Read( FILE * fp )" << std::endl; 00076 00077 C_CompressedAutomaton = CompressedAutomatonRead( fp ); 00078 00079 // assert(C_CompressedAutomaton != NO); 00080 00081 ManagedAutomatonStage_ = CLOSED; 00082 00083 return C_CompressedAutomaton; 00084 } 00085 virtual inline CompressedAutomaton * Read(const S8* filename) { 00086 std::cout << "CompressedAutomaton * InenagaCDAWGAdapter::Read( const S8* fn )" << std::endl; 00087 00088 00089 FILE* fpIn = Fopen(filename, "rb"); 00090 Read( fpIn ); 00091 Fclose(fpIn); 00092 00093 ManagedAutomatonStage_ = CLOSED; 00094 00095 return automaton(); 00096 } 00097 00098 00102 00103 00109 00110 00111 00112 00116 // virtual void Add( std::ifstream& in) { 00117 // std::string tmp; 00118 // in >> tmp; 00119 // Add(const_cast<std::string&>(tmp)); 00120 // } 00122 00123 virtual std::string DumpStat() const; 00124 virtual void DumpStat(const S8* outfile) const; 00125 00132 // virtual UINT number_of_documents() { return automaton()->statesTransitions->seqStored; } 00133 00134 virtual CompressedAutomaton* automaton() const; 00135 00136 00138 virtual inline CDAWGBuildHelp* buildhelp() const { 00139 assert(ManagedAutomatonStage_ < CLOSED); 00140 return C_CDAWGBuildHelp; 00141 } 00142 00143 00149 virtual inline 00150 UINT 00151 suffixLink( const UINT& state ) const 00152 { 00153 assert(ManagedAutomatonStage_ < CLOSED); 00154 00155 return buildhelp()->statesSuffixLink->seq[state]; 00156 } 00157 00158 virtual inline UINT printsymbolsize() const { return automaton()->data->elementSize; } 00159 00160 }; 00161 00162 }}} /* End of namespace lmu::cis::sis */ 00163 00164 #endif /* end of include guard: INENAGACDAWGADAPTER_HPP */ 00165