SIS
Symmetric Index Structures
/Users/dbr/ma/src/lmu/cis/sis/indexer/DocumentIndexingAutomaton.hpp
Go to the documentation of this file.
00001 #ifndef DOCUMENTINDEXINGAUTOMATON_HPP
00002 #define DOCUMENTINDEXINGAUTOMATON_HPP
00003 
00004 #include "../cppbase.hpp"
00005 
00006 #include <iostream>
00007 #include <string>
00008 #include <map>
00009 #include <vector>
00010 #include <cassert>
00011 
00012 #include "../adapter/ScdawgAdapter.hpp"
00013 #include "../adapter/InenagaCDAWGAdapter.hpp"
00014 #include "../adapter/VoidSequenceAdapter.hpp"
00015 
00016 // #include "AutomatonTypedefs.hpp"
00017 // #include "adapter/ManagedStageAutomatonInterface.hpp"
00018 
00019 #include "DocumentIndexingAutomatonInterface.hpp"
00020 #include "DocumentIndexingAutomatonFindResults.hpp"
00021 
00022 /* Get into namespace */
00023 using lmu::cis::sis::InenagaCDAWGAdapter;
00024 using lmu::cis::sis::SCDAWGAdapter;
00025 using lmu::cis::sis::DocumentIndexingAutomatonFindResults;
00026 
00027 
00028 namespace lmu { namespace cis { namespace sis {
00029 
00030 
00041 template<typename AutomatonType>
00042 class DocumentIndexingAutomaton
00043     : public virtual AutomatonType
00044     , public virtual DocumentIndexingAutomatonInterface
00045     // , public virtual CompressedAutomatonAdapterInterface // TODO Split into CA-interface and CA-typedefs (or better yet, put into separate header!)
00046     , public virtual ManagedStageAutomatonInterface
00047 {
00048 
00049     // friend class DocumentIndexingFindResults;
00050 
00051 
00057     // Dispatching table
00058     using AutomatonType::Add;
00059     using AutomatonType::GetInitialPairState;
00060     using AutomatonType::Delta;
00061     using AutomatonType::Close;
00062     // using AutomatonType::suffixLink;
00064 
00065 protected:
00083 
00084     std::map<DocumentName, State>                   _document_sinkstate;    
00085     std::map<DocumentName, PositionsTuple>          _document_length;       
00086     std::vector<DocumentName>                       _document_names;        
00087     std::map<State, std::vector<DocumentName>>      _states_documents;      
00088 
00089 
00090 public:
00091 
00092     // using AutomatonType::AutomatonType; /* unfortunately constructor delegation is not supported by gcc just yet */
00093 
00094 
00098     DocumentIndexingAutomaton() : DocumentIndexingAutomaton(1) {
00099         if (DEBUG_LEVEL >= 4) std::cerr << "DocumentIndexingAutomaton::DocumentIndexingAutomaton()" << std::endl;
00100     }
00101 
00102 
00112     DocumentIndexingAutomaton(UINT symbolsize)
00113         : CompressedAutomatonAdapter(symbolsize) //HACK?
00114         , AutomatonType(symbolsize)
00115     {
00116             if (DEBUG_LEVEL >= 4) std::cerr << "DocumentIndexingAutomaton::DocumentIndexingAutomaton(" << symbolsize << ")" << std::endl;
00117     }
00118 
00119 
00121     virtual ~DocumentIndexingAutomaton() {
00122         if (DEBUG_LEVEL >= 4) std::cerr << "virtual DocumentIndexingAutomaton::~DocumentIndexingAutomaton()" << std::endl;
00123     }
00124 
00125 
00126 
00136     virtual void AddDocuments(const std::vector<DocumentName>& documents);
00137 
00138 
00139 
00157     virtual void Index();
00158 
00159 
00160 
00168     virtual void Close();
00169 
00170 
00171 
00183     virtual DocumentIndexingAutomatonFindResults&& findall( const std::string& w );
00184 
00185 
00186     // virtual std::string right_continuation( const UINT& state ) const;
00187     // virtual std::string lrcont(const UINT pos, const UINT width = 5) const;
00188 
00189 
00190 
00191 protected:
00192 
00210     virtual DocumentIndexingAutomatonFindResults&& find( const PairState state, UINT length );
00211 
00212 
00217     virtual DocumentIndexingAutomatonFindResults&& findRec(const UINT s, const UINT length);
00218 
00219 
00231     virtual UINT suffixLink(const UINT& state) const;
00232 
00233 
00234 
00240     // virtual std::string data_at(UINT pos) const;
00241 
00242 };
00243 
00244 }}} /* End of namespace lmu::cis::sis */
00245 
00246 #include "DocumentIndexingAutomaton.cpp"
00247 
00248 #endif /* end of include guard: DOCUMENTINDEXINGAUTOMATON_HPP */