SIS
Symmetric Index Structures
|
00001 #include "base.h" 00002 00003 static UINT GetIndexOfTransition( void * ptr, const void * object ){ 00004 SuffixTreeBuildHelp * help; 00005 TransitionFromStartTo * tr; 00006 UINT index; 00007 00008 help = (SuffixTreeBuildHelp *)(ptr); 00009 index = mCompressedAutomatonGetTransitionsStored( help->aut ); 00010 tr = (TransitionFromStartTo *)(object); 00011 UINTSequenceAdd( help->transitionsFrom, tr->from ); 00012 CompressedAutomatonAddTransition( help->aut, tr->from, tr->start, tr->to ); 00013 return index; 00014 } 00015 00016 static UINT GetFirstIndex( void * ptr ){ 00017 SuffixTreeBuildHelp * help; 00018 00019 help = (SuffixTreeBuildHelp *)(ptr); 00020 if( mCompressedAutomatonGetTransitionsStored( help->aut ) == 0 ){ 00021 return NO; 00022 } 00023 return 0; 00024 } 00025 00026 static UINT GetNextIndex( void * ptr, UINT index ){ 00027 SuffixTreeBuildHelp * help; 00028 00029 help = (SuffixTreeBuildHelp *)(ptr); 00030 index++; 00031 if( index < mCompressedAutomatonGetTransitionsStored( help->aut ) ){ 00032 return index; 00033 } 00034 return NO; 00035 } 00036 00037 static boolean Equals( void * ptr, const void * object, UINT index ){ 00038 SuffixTreeBuildHelp * help; 00039 TransitionFromStartTo * tr; 00040 00041 help = (SuffixTreeBuildHelp *)(ptr); 00042 tr = (TransitionFromStartTo *)(object); 00043 return( help->transitionsFrom->seq[index] == tr->from && 00044 CmpSymbols( mVoidSequenceElement(help->aut->data, help->aut->transitionsStart->seq[index]), tr->symbol, help->aut->data->elementSize ) == 0 ); 00045 } 00046 00047 static UINT HashCode( void * ptr, const void * object, UINT hashSize ){ 00048 SuffixTreeBuildHelp * help; 00049 TransitionFromStartTo * tr; 00050 00051 help = (SuffixTreeBuildHelp *)(ptr); 00052 tr = (TransitionFromStartTo *)(object); 00053 return CodeBytes( CodeUINT( 0, tr->from, hashSize ), tr->symbol, help->aut->data->elementSize, hashSize ); 00054 } 00055 00056 static UINT HashCode2( void * ptr, UINT index, UINT hashSize ){ 00057 SuffixTreeBuildHelp * help; 00058 00059 help = (SuffixTreeBuildHelp *)(ptr); 00060 return CodeBytes( CodeUINT( 0, help->transitionsFrom->seq[index], hashSize ), 00061 mVoidSequenceElement( help->aut->data, help->aut->transitionsStart->seq[index] ), 00062 help->aut->data->elementSize, 00063 hashSize ); 00064 } 00065 00066 SuffixTreeBuildHelp * SuffixTreeBuildHelpInit( CompressedAutomaton * aut ){ 00067 SuffixTreeBuildHelp * help; 00068 00069 help = Malloc( 1, sizeof(SuffixTreeBuildHelp) ); 00070 help->aut = aut; 00071 help->statesSuffixLink = UINTSequenceInit2( aut->statesTransitions->seqAlloced, 1024 ); 00072 help->transitionsFrom = UINTSequenceInit2( aut->transitionsStart->seqAlloced, 1024 ); 00073 help->hash = HashInit( help, GetIndexOfTransition, GetFirstIndex, GetNextIndex, Equals, HashCode, HashCode2 ); 00074 help->symbol = Malloc( 1, aut->data->elementSize ); 00075 return help; 00076 } 00077 00078 void SuffixTreeBuildHelpFree( SuffixTreeBuildHelp * help ){ 00079 UINTSequenceFree( help->statesSuffixLink ); 00080 UINTSequenceFree( help->transitionsFrom ); 00081 HashFree( help->hash ); 00082 Free( help->symbol ); 00083 Free( help ); 00084 } 00085 00086 static PairState Delta( SuffixTreeBuildHelp * help, const PairState * state, const void * symbol ){ 00087 PairState nextState; 00088 CompressedAutomaton * a; 00089 00090 a = help->aut; 00091 nextState.transition = NO; 00092 nextState.index = NO; 00093 if( state->transition != NO ){ 00094 if( CmpSymbols( mVoidSequenceElement(a->data, state->index), symbol, a->data->elementSize ) != 0 ){ 00095 return nextState; 00096 } 00097 nextState.index = state->index + 1; 00098 nextState.transition = state->transition; 00099 if( nextState.index > a->statesEnd->seq[a->transitionsTo->seq[state->transition]] ){ 00100 nextState.transition = NO; 00101 nextState.index = a->transitionsTo->seq[state->transition]; 00102 } 00103 } 00104 else{ 00105 TransitionFromStartTo tfst; 00106 UINT tr; 00107 00108 tfst.from = state->index; 00109 tfst.symbol = symbol; 00110 tr = HashGetIndex( help->hash, &tfst ); 00111 if( tr == NO ){ 00112 return nextState; 00113 } 00114 nextState.index = a->transitionsStart->seq[tr] + 1; 00115 nextState.transition = tr; 00116 if( nextState.index > a->statesEnd->seq[a->transitionsTo->seq[tr]] ){ 00117 nextState.transition = NO; 00118 nextState.index = a->transitionsTo->seq[tr]; 00119 } 00120 } 00121 return nextState; 00122 } 00123 00124 static UINT NewState( SuffixTreeBuildHelp * help ){ 00125 UINT newState; 00126 00127 newState = mCompressedAutomatonGetStatesStored( help->aut ); 00128 CompressedAutomatonAddState( help->aut, newState ); 00129 UINTSequenceAdd( help->statesSuffixLink, NO ); 00130 return newState; 00131 } 00132 00133 static boolean TestAndSplit( SuffixTreeBuildHelp * help, const PairState * state, UINT i, UINT * r ){ 00134 PairState nextState; 00135 UINT newState, to; 00136 TransitionFromStartTo tfst; 00137 00138 nextState = Delta( help, state, mVoidSequenceElement(help->aut->data, i) ); 00139 if( nextState.index != NO ){ 00140 return bTRUE; 00141 } 00142 if( state->transition == NO ){ 00143 (*r) = state->index; 00144 return bFALSE; 00145 } 00146 newState = NewState( help ); 00147 help->aut->statesEnd->seq[newState] = state->index - 1; 00148 to = help->aut->transitionsTo->seq[state->transition]; 00149 help->aut->transitionsTo->seq[state->transition] = newState; 00150 tfst.from = newState; 00151 tfst.start = state->index; 00152 tfst.to = to; 00153 memcpy( help->symbol, mVoidSequenceElement(help->aut->data, state->index), help->aut->data->elementSize ); 00154 tfst.symbol = help->symbol; 00155 HashAdd( help->hash, &tfst ); 00156 (*r) = newState; 00157 return bFALSE; 00158 } 00159 00160 static PairState SuffixLink( SuffixTreeBuildHelp * help, const PairState * state ){ 00161 PairState fState; 00162 TransitionFromStartTo tfst; 00163 UINT s, start, end, tr, startTr, endTr; 00164 00165 if( state->transition == NO ){ 00166 fState.transition = NO; 00167 fState.index = help->statesSuffixLink->seq[state->index]; 00168 return fState; 00169 } 00170 s = help->transitionsFrom->seq[state->transition]; 00171 start = help->aut->transitionsStart->seq[state->transition]; 00172 end = state->index - 1; 00173 if( s == help->aut->initialState ){ 00174 start++; 00175 if( start > end ){ 00176 return CompressedAutomatonGetInitialPairState( help->aut ); 00177 } 00178 } 00179 else{ 00180 s = help->statesSuffixLink->seq[s]; 00181 } 00182 while( bTRUE ){ 00183 tfst.from = s; 00184 tfst.symbol = mVoidSequenceElement( help->aut->data, start ); 00185 tr = HashGetIndex( help->hash, &tfst ); 00186 startTr = help->aut->transitionsStart->seq[tr]; 00187 endTr = help->aut->statesEnd->seq[help->aut->transitionsTo->seq[tr]]; 00188 if( end - start < endTr - startTr ){ 00189 fState.index = startTr + end - start + 1; 00190 fState.transition = tr; 00191 break; 00192 } 00193 if( end - start == endTr - startTr ){ 00194 fState.index = help->aut->transitionsTo->seq[tr]; 00195 fState.transition = NO; 00196 break; 00197 } 00198 s = help->aut->transitionsTo->seq[tr]; 00199 start += (endTr - startTr + 1); 00200 } 00201 return fState; 00202 } 00203 00204 static PairState Update( SuffixTreeBuildHelp * help, const PairState * state, UINT i ){ 00205 PairState state_with_i; 00206 UINT r, newState, prevr; 00207 TransitionFromStartTo tfst; 00208 00209 state_with_i = (*state); 00210 prevr = help->aut->initialState; 00211 while( !TestAndSplit( help, &state_with_i, i, &r ) ){ 00212 newState = NewState( help ); 00213 help->aut->statesEnd->seq[newState] = help->aut->data->seqStored - 1; 00214 tfst.from = r; 00215 tfst.start = i; 00216 tfst.to = newState; 00217 memcpy( help->symbol, mVoidSequenceElement(help->aut->data, i), help->aut->data->elementSize ); 00218 tfst.symbol = help->symbol; 00219 HashAdd( help->hash, &tfst ); 00220 if( prevr != help->aut->initialState ){ 00221 help->statesSuffixLink->seq[prevr] = r; 00222 } 00223 if( state_with_i.transition == NO && state_with_i.index == help->aut->initialState ){ 00224 return state_with_i; 00225 } 00226 prevr = r; 00227 state_with_i = SuffixLink( help, &state_with_i ); 00228 } 00229 if( prevr != help->aut->initialState ){ 00230 help->statesSuffixLink->seq[prevr] = state_with_i.index; 00231 } 00232 return Delta( help, &state_with_i, mVoidSequenceElement(help->aut->data, i) ); 00233 } 00234 00235 void SuffixTreeAdd( SuffixTreeBuildHelp * help, const VoidSequence * documentDollar ){ 00236 PairState state, nextState; 00237 UINT i; 00238 00239 i = help->aut->data->seqStored; 00240 VoidSequenceAppend( help->aut->data, documentDollar ); 00241 if( help->aut->initialState == NO ){ 00242 UINT newState; 00243 00244 newState = NewState( help ); 00245 help->aut->initialState = newState; 00246 state = CompressedAutomatonGetInitialPairState( help->aut ); 00247 } 00248 else{ 00249 state = CompressedAutomatonGetInitialPairState( help->aut ); 00250 for( ; i < help->aut->data->seqStored; i++ ){ 00251 nextState = Delta( help, &state, mVoidSequenceElement(help->aut->data, i) ); 00252 if( nextState.index == NO ){ 00253 break; 00254 } 00255 state = nextState; 00256 } 00257 } 00258 for( ; i < help->aut->data->seqStored; i++ ){ 00259 state = Update( help, &state, i ); 00260 } 00261 }