SIS
Symmetric Index Structures
|
00001 #ifndef COMPRESSEDAUTOMATONADAPTER_HPP 00002 #define COMPRESSEDAUTOMATONADAPTER_HPP 00003 00004 #include "../cppbase.hpp" 00005 00006 #include <cstdio> 00007 #include <iostream> 00008 #include <fstream> 00009 #include <string> 00010 00011 #include "CompressedAutomatonAdapterInterface.hpp" 00012 #include "CompressedAutomatonDumpingInterface.hpp" 00013 #include "CompressedAutomatonSerializing.hpp" 00014 #include "ManagedStageAutomatonInterface.hpp" 00015 00017 extern "C" void DumpSequenceOfLabels( FILE * fp, const VoidSequence * suffix, UINT encoding ); 00018 extern "C" void DumpLabel( const CompressedAutomaton * aut, FILE * fp, UINT tr, UINT encoding ); 00019 extern "C" void DumpSequenceOfLabels2( FILE * fp, const VoidSequence * suffix, UINT encoding ); 00020 extern "C" void DumpSCDAWGLabel( const SCDAWG * aut, FILE * fp, UINT tr, boolean right, UINT encoding ); 00021 00022 namespace lmu { namespace cis { namespace sis { 00023 00024 class CompressedAutomatonAdapter 00025 : public virtual CompressedAutomatonAdapterInterface 00026 , public virtual CompressedAutomatonDumpingInterface 00027 , public virtual ManagedStageAutomatonInterface 00028 { 00029 00030 protected: 00031 UINT C_symbolsize; // in (machine)-word-width 00032 CompressedAutomaton* C_CompressedAutomaton; 00033 00034 public: 00036 CompressedAutomatonAdapter() : CompressedAutomatonAdapter(1) { 00037 if (DEBUG_LEVEL > 5) std::cerr << "CompressedAutomatonAdapter::CompressedAutomatonAdapter()" << std::endl; 00038 } 00039 00040 CompressedAutomatonAdapter(UINT symbolsize) 00041 : C_symbolsize( symbolsize ) 00042 // , C_CompressedAutomaton( CompressedAutomatonInit(symbolsize) ) 00043 // , ManagedAutomatonStage_(EMPTY) 00044 { 00045 C_CompressedAutomaton = CompressedAutomatonInit(symbolsize); 00046 if (DEBUG_LEVEL > 5) std::cerr << "CompressedAutomatonAdapter::CompressedAutomatonAdapter( UINT symbolSize = " << symbolsize << " )" << std::endl; 00047 } 00048 00049 virtual ~CompressedAutomatonAdapter() { 00050 if (DEBUG_LEVEL > 5) std::cerr << "virtual CompressedAutomatonAdapter::~CompressedAutomatonAdapter()" << std::endl; 00051 if (DEBUG_LEVEL > 5) std::cerr << " C_symbolsize = " << C_symbolsize << std::endl; 00052 CompressedAutomatonFree(C_CompressedAutomaton); 00053 } 00055 00056 protected: 00057 /* * * * * * * * * * * * Any-Closed-State Methods * * * * * * * * * * * */ 00058 virtual void Free(); 00059 virtual PairState GetInitialPairState() const; 00060 00061 public: 00062 virtual inline UINT get_symbol_size() const { return C_symbolsize; } 00063 00064 /* * * * * * * * * * * * Pre-Closed Methods * * * * * * * * * * * */ 00065 virtual void AddState( UINT state ); 00066 virtual void AddTransition( UINT stateFrom, UINT start, UINT stateTo ); 00067 00068 /* * * * * * * * * * * * Pre-/Post-Closed/Closing Methods * * * * * * * * * * * */ 00069 // virtual CompressedAutomaton * Read( FILE * fp ); 00070 // virtual CompressedAutomaton * Read( const S8* fn ); 00071 00072 virtual SinkState Add( const VoidSequence * sharpDocumentDollar ) = 0; 00073 virtual SinkState Add( const VoidSequenceAdapter& sharpDocumentDollar ); 00074 virtual SinkState Add( const S8* documentDollar ); 00075 virtual SinkState Add( const std::string& word ); 00076 virtual void AddDictionary( std::istream& ); 00077 00078 /* * * * * * * * * * * * Post-Closed Methods * * * * * * * * * * * */ 00079 virtual void Shrink(); 00080 00081 virtual void Write( FILE * fp ) const; 00082 virtual void Write( const S8* fn ) const; 00083 00084 virtual void DumpStat( FILE * fp ) const; 00085 virtual void DumpStat( const S8* fn ) const; 00086 virtual void GenerateLanguage( const S8* fn, UINT encoding = ENCODING_PLAIN ) const; 00087 virtual void LeftAutomatonGenerateLanguage( FILE * fp, void (*DumpSequenceOfLabels)(FILE *, const VoidSequence *, UINT), UINT encoding ) const; 00088 virtual void AddTarjanTable(); 00089 // virtual void GenerateLanguage( const S8* fn, void (*DumpSequenceOfLabels)(FILE *, const VoidSequence *, UINT), UINT encoding = ENCODING_PLAIN ) const { FILE* fpOut = Fopen(fn, "wb"); CompressedAutomatonGenerateLanguage( C_CompressedAutomaton, fpOut, DumpSequenceOfLabels, encoding );} 00090 00091 public: 00092 /* * * * * * * * * * * * Post-Closed Methods * * * * * * * * * * * */ 00093 virtual PairState Delta( const PairState * state, const void * symbol ) const; 00094 virtual PairState Delta( const PairState * state, const char symbol ) const; 00095 virtual PairState Delta( const void * symbol ) const; 00096 virtual PairState Delta( const S8* pattern ) const; 00097 00098 virtual void DumpGV( FILE * fp, void (*DumpLabel)(const CompressedAutomaton *, FILE *, UINT, UINT), UINT encoding ); 00099 00100 virtual UINT number_of_states() const; 00101 virtual UINT number_of_transitions() const; 00102 virtual UINT number_of_transitions(State s) const; 00103 00104 virtual inline std::string data_at(UINT pos) const { 00105 char * ptr = (S8*)mVoidSequenceElement( automaton()->data, pos ); 00106 return std::string( 00107 ptr, 1 00108 ); 00109 } 00110 00111 virtual inline UINT data_length() const { return automaton()->data->seqStored; } 00112 // protected: 00113 virtual CompressedAutomaton* automaton() const = 0; 00114 // virtual UINT suffixLink(const UINT&) const = 0; 00115 00116 private: 00118 virtual void Print( std::ostream& os) const; 00119 virtual void ReadAll( std::istream& in); 00120 virtual void ReadOne( std::istream& in); 00121 }; 00122 00123 }}} /* End of namespace lmu::cis::sis */ 00124 00125 #endif /* end of include guard: COMPRESSEDAUTOMATONADAPTER_HPP */