SIS
Symmetric Index Structures
/Users/dbr/ma/src/lmu/cis/sis/adapter/CompressedAutomatonAdapter.hpp
Go to the documentation of this file.
00001 #ifndef COMPRESSEDAUTOMATONADAPTER_HPP
00002 #define COMPRESSEDAUTOMATONADAPTER_HPP
00003 
00004 #include "../cppbase.hpp"
00005 
00006 #include <cstdio>
00007 #include <iostream>
00008 #include <fstream>
00009 #include <string>
00010 
00011 #include "CompressedAutomatonAdapterInterface.hpp"
00012 #include "CompressedAutomatonDumpingInterface.hpp"
00013 #include "CompressedAutomatonSerializing.hpp"
00014 #include "ManagedStageAutomatonInterface.hpp"
00015 
00017 extern "C" void DumpSequenceOfLabels( FILE * fp, const VoidSequence * suffix, UINT encoding );
00018 extern "C" void DumpLabel( const CompressedAutomaton * aut, FILE * fp, UINT tr, UINT encoding );
00019 extern "C" void DumpSequenceOfLabels2( FILE * fp, const VoidSequence * suffix, UINT encoding );
00020 extern "C" void DumpSCDAWGLabel( const SCDAWG * aut, FILE * fp, UINT tr, boolean right, UINT encoding );
00021 
00022 namespace lmu { namespace cis { namespace sis {
00023 
00024 class CompressedAutomatonAdapter
00025     : public virtual CompressedAutomatonAdapterInterface
00026     , public virtual CompressedAutomatonDumpingInterface
00027     , public virtual ManagedStageAutomatonInterface
00028 {
00029 
00030 protected:
00031     UINT                    C_symbolsize;           // in (machine)-word-width
00032     CompressedAutomaton*    C_CompressedAutomaton;
00033 
00034 public:
00036     CompressedAutomatonAdapter() : CompressedAutomatonAdapter(1) {
00037         if (DEBUG_LEVEL > 5) std::cerr << "CompressedAutomatonAdapter::CompressedAutomatonAdapter()" << std::endl;
00038     }
00039 
00040     CompressedAutomatonAdapter(UINT symbolsize)
00041         : C_symbolsize( symbolsize )
00042         // , C_CompressedAutomaton( CompressedAutomatonInit(symbolsize) )
00043         // , ManagedAutomatonStage_(EMPTY)
00044     {
00045         C_CompressedAutomaton = CompressedAutomatonInit(symbolsize);
00046         if (DEBUG_LEVEL > 5) std::cerr << "CompressedAutomatonAdapter::CompressedAutomatonAdapter( UINT symbolSize = " << symbolsize << " )" << std::endl;
00047     }
00048 
00049     virtual ~CompressedAutomatonAdapter() {
00050         if (DEBUG_LEVEL > 5) std::cerr << "virtual CompressedAutomatonAdapter::~CompressedAutomatonAdapter()" << std::endl;
00051         if (DEBUG_LEVEL > 5) std::cerr << " C_symbolsize = " << C_symbolsize << std::endl;
00052         CompressedAutomatonFree(C_CompressedAutomaton);
00053     }
00055 
00056 protected:
00057     /* * * * * * * * * * * *  Any-Closed-State Methods   * * * * * * * * * * * */
00058     virtual void Free();
00059     virtual PairState GetInitialPairState() const;
00060 
00061 public:
00062     virtual inline UINT get_symbol_size() const { return C_symbolsize; }
00063     
00064     /* * * * * * * * * * * *  Pre-Closed Methods   * * * * * * * * * * * */
00065     virtual void AddState( UINT state );
00066     virtual void AddTransition( UINT stateFrom, UINT start, UINT stateTo );
00067 
00068     /* * * * * * * * * * * *  Pre-/Post-Closed/Closing Methods   * * * * * * * * * * * */
00069     // virtual CompressedAutomaton * Read( FILE * fp );
00070     // virtual CompressedAutomaton * Read( const S8* fn );
00071 
00072     virtual SinkState Add( const VoidSequence * sharpDocumentDollar ) = 0;
00073     virtual SinkState Add( const VoidSequenceAdapter& sharpDocumentDollar );
00074     virtual SinkState Add( const S8* documentDollar );
00075     virtual SinkState Add( const std::string& word );
00076     virtual void AddDictionary( std::istream& );
00077 
00078     /* * * * * * * * * * * *  Post-Closed Methods   * * * * * * * * * * * */
00079     virtual void Shrink();
00080 
00081     virtual void Write( FILE * fp ) const;
00082     virtual void Write( const S8* fn ) const;
00083 
00084     virtual void DumpStat( FILE * fp ) const;
00085     virtual void DumpStat( const S8* fn ) const;
00086     virtual void GenerateLanguage( const S8* fn, UINT encoding = ENCODING_PLAIN ) const;
00087     virtual void LeftAutomatonGenerateLanguage( FILE * fp, void (*DumpSequenceOfLabels)(FILE *, const VoidSequence *, UINT), UINT encoding ) const;
00088     virtual void AddTarjanTable();
00089     // virtual void GenerateLanguage( const S8* fn, void (*DumpSequenceOfLabels)(FILE *, const VoidSequence *, UINT), UINT encoding = ENCODING_PLAIN ) const { FILE* fpOut = Fopen(fn, "wb"); CompressedAutomatonGenerateLanguage( C_CompressedAutomaton, fpOut, DumpSequenceOfLabels, encoding );}
00090 
00091 public:
00092     /* * * * * * * * * * * *  Post-Closed Methods   * * * * * * * * * * * */
00093     virtual PairState Delta( const PairState * state, const void * symbol ) const;
00094     virtual PairState Delta( const PairState * state, const char symbol ) const;
00095     virtual PairState Delta( const void * symbol ) const;
00096     virtual PairState Delta( const S8* pattern ) const;
00097 
00098     virtual void DumpGV( FILE * fp, void (*DumpLabel)(const CompressedAutomaton *, FILE *, UINT, UINT), UINT encoding );
00099 
00100     virtual UINT number_of_states() const;                  
00101     virtual UINT number_of_transitions() const;             
00102     virtual UINT number_of_transitions(State s) const;      
00103 
00104     virtual inline std::string data_at(UINT pos) const {
00105         char * ptr = (S8*)mVoidSequenceElement( automaton()->data, pos );
00106         return std::string(
00107             ptr, 1
00108         );
00109     }
00110 
00111     virtual inline UINT data_length() const { return automaton()->data->seqStored; }
00112 // protected:
00113     virtual CompressedAutomaton* automaton() const = 0;
00114     // virtual UINT suffixLink(const UINT&) const = 0;
00115 
00116 private:
00118     virtual void Print( std::ostream& os) const;
00119     virtual void ReadAll( std::istream& in);
00120     virtual void ReadOne( std::istream& in);
00121 };
00122 
00123 }}} /* End of namespace lmu::cis::sis */
00124 
00125 #endif /* end of include guard: COMPRESSEDAUTOMATONADAPTER_HPP */