SIS
Symmetric Index Structures
|
00001 #ifndef DOCUMENTINDEXINGAUTOMATON_HPP 00002 #define DOCUMENTINDEXINGAUTOMATON_HPP 00003 00004 #include "../cppbase.hpp" 00005 00006 #include <iostream> 00007 #include <string> 00008 #include <map> 00009 #include <vector> 00010 #include <cassert> 00011 00012 #include "../adapter/ScdawgAdapter.hpp" 00013 #include "../adapter/InenagaCDAWGAdapter.hpp" 00014 #include "../adapter/VoidSequenceAdapter.hpp" 00015 00016 // #include "AutomatonTypedefs.hpp" 00017 // #include "adapter/ManagedStageAutomatonInterface.hpp" 00018 00019 #include "DocumentIndexingAutomatonInterface.hpp" 00020 #include "DocumentIndexingAutomatonFindResults.hpp" 00021 00022 /* Get into namespace */ 00023 using lmu::cis::sis::InenagaCDAWGAdapter; 00024 using lmu::cis::sis::SCDAWGAdapter; 00025 using lmu::cis::sis::DocumentIndexingAutomatonFindResults; 00026 00027 00028 namespace lmu { namespace cis { namespace sis { 00029 00030 00041 template<typename AutomatonType> 00042 class DocumentIndexingAutomaton 00043 : public virtual AutomatonType 00044 , public virtual DocumentIndexingAutomatonInterface 00045 // , public virtual CompressedAutomatonAdapterInterface // TODO Split into CA-interface and CA-typedefs (or better yet, put into separate header!) 00046 , public virtual ManagedStageAutomatonInterface 00047 { 00048 00049 // friend class DocumentIndexingFindResults; 00050 00051 00057 // Dispatching table 00058 using AutomatonType::Add; 00059 using AutomatonType::GetInitialPairState; 00060 using AutomatonType::Delta; 00061 using AutomatonType::Close; 00062 // using AutomatonType::suffixLink; 00064 00065 protected: 00083 00084 std::map<DocumentName, State> _document_sinkstate; 00085 std::map<DocumentName, PositionsTuple> _document_length; 00086 std::vector<DocumentName> _document_names; 00087 std::map<State, std::vector<DocumentName>> _states_documents; 00088 00089 00090 public: 00091 00092 // using AutomatonType::AutomatonType; /* unfortunately constructor delegation is not supported by gcc just yet */ 00093 00094 00098 DocumentIndexingAutomaton() : DocumentIndexingAutomaton(1) { 00099 if (DEBUG_LEVEL >= 4) std::cerr << "DocumentIndexingAutomaton::DocumentIndexingAutomaton()" << std::endl; 00100 } 00101 00102 00112 DocumentIndexingAutomaton(UINT symbolsize) 00113 : CompressedAutomatonAdapter(symbolsize) //HACK? 00114 , AutomatonType(symbolsize) 00115 { 00116 if (DEBUG_LEVEL >= 4) std::cerr << "DocumentIndexingAutomaton::DocumentIndexingAutomaton(" << symbolsize << ")" << std::endl; 00117 } 00118 00119 00121 virtual ~DocumentIndexingAutomaton() { 00122 if (DEBUG_LEVEL >= 4) std::cerr << "virtual DocumentIndexingAutomaton::~DocumentIndexingAutomaton()" << std::endl; 00123 } 00124 00125 00126 00136 virtual void AddDocuments(const std::vector<DocumentName>& documents); 00137 00138 00139 00157 virtual void Index(); 00158 00159 00160 00168 virtual void Close(); 00169 00170 00171 00183 virtual DocumentIndexingAutomatonFindResults&& findall( const std::string& w ); 00184 00185 00186 // virtual std::string right_continuation( const UINT& state ) const; 00187 // virtual std::string lrcont(const UINT pos, const UINT width = 5) const; 00188 00189 00190 00191 protected: 00192 00210 virtual DocumentIndexingAutomatonFindResults&& find( const PairState state, UINT length ); 00211 00212 00217 virtual DocumentIndexingAutomatonFindResults&& findRec(const UINT s, const UINT length); 00218 00219 00231 virtual UINT suffixLink(const UINT& state) const; 00232 00233 00234 00240 // virtual std::string data_at(UINT pos) const; 00241 00242 }; 00243 00244 }}} /* End of namespace lmu::cis::sis */ 00245 00246 #include "DocumentIndexingAutomaton.cpp" 00247 00248 #endif /* end of include guard: DOCUMENTINDEXINGAUTOMATON_HPP */