SIS
Symmetric Index Structures
/Users/dbr/ma/src/bas/lml/inenagaCDAWG.c
Go to the documentation of this file.
00001 #include "base.h"
00002 
00003 static UINT GetIndexOfTransition( void * ptr, const void * object ){
00004     CDAWGBuildHelp * help;
00005     TransitionFromStartTo * tr;
00006     UINT index;
00007 
00008     help = (CDAWGBuildHelp *)(ptr);
00009     index = mCompressedAutomatonGetTransitionsStored( help->aut );
00010     tr = (TransitionFromStartTo *)(object);
00011     UINTSequenceAdd( help->transitionsFrom, tr->from );
00012     CompressedAutomatonAddTransition( help->aut, tr->from, tr->start, tr->to );
00013     if( help->aut->statesTransitions->seq[tr->from] == index ){
00014         UINTSequenceAdd( help->transitionsNext, NO );
00015     }
00016     else{
00017         UINTSequenceAdd( help->transitionsNext, help->aut->statesTransitions->seq[tr->from] );
00018         help->aut->statesTransitions->seq[tr->from] = index;
00019     }
00020     return index;
00021 }
00022 
00023 static UINT GetFirstIndex( void * ptr ){
00024     CDAWGBuildHelp * help;
00025 
00026     help = (CDAWGBuildHelp *)(ptr);
00027     if( mCompressedAutomatonGetTransitionsStored( help->aut ) == 0 ){
00028         return NO;
00029     }
00030     return 0;
00031 }
00032 
00033 static UINT GetNextIndex( void * ptr, UINT index ){
00034     CDAWGBuildHelp * help;
00035 
00036     help = (CDAWGBuildHelp *)(ptr);
00037     index++;
00038     if( index < mCompressedAutomatonGetTransitionsStored( help->aut ) ){
00039         return index;
00040     }
00041     return NO;
00042 }
00043 
00044 static boolean Equals( void * ptr, const void * object, UINT index ){
00045     CDAWGBuildHelp * help;
00046     TransitionFromStartTo * tr;
00047 
00048     help = (CDAWGBuildHelp *)(ptr);
00049     tr = (TransitionFromStartTo *)(object);
00050     return( help->transitionsFrom->seq[index] == tr->from &&
00051             CmpSymbols( mVoidSequenceElement(help->aut->data, help->aut->transitionsStart->seq[index]), tr->symbol, help->aut->data->elementSize ) == 0 );
00052 }
00053 
00054 static UINT HashCode( void * ptr, const void * object, UINT hashSize ){
00055     CDAWGBuildHelp * help;
00056     TransitionFromStartTo * tr;
00057 
00058     help = (CDAWGBuildHelp *)(ptr);
00059     tr = (TransitionFromStartTo *)(object);
00060     return CodeBytes( CodeUINT( 0, tr->from, hashSize ), tr->symbol, help->aut->data->elementSize, hashSize );
00061 }
00062 
00063 static UINT HashCode2( void * ptr, UINT index, UINT hashSize ){
00064     CDAWGBuildHelp * help;
00065 
00066     help = (CDAWGBuildHelp *)(ptr);
00067     return CodeBytes( CodeUINT( 0, help->transitionsFrom->seq[index], hashSize ),
00068                       mVoidSequenceElement( help->aut->data, help->aut->transitionsStart->seq[index] ),
00069                       help->aut->data->elementSize,
00070                       hashSize );
00071 }
00072 
00073 CDAWGBuildHelp * CDAWGBuildHelpInit( CompressedAutomaton * aut ){
00074     CDAWGBuildHelp * help;
00075 
00076     help = Malloc( 1, sizeof(CDAWGBuildHelp) );
00077     help->aut = aut;
00078     help->statesSuffixLink = UINTSequenceInit2( aut->statesTransitions->seqAlloced, 1024 );
00079     help->statesLength = UINTSequenceInit2( aut->statesTransitions->seqAlloced, 1024 );
00080     help->transitionsFrom = UINTSequenceInit2( aut->transitionsStart->seqAlloced, 1024 );
00081     help->transitionsNext = UINTSequenceInit2( aut->transitionsStart->seqAlloced, 1024 );
00082     help->hash = HashInit( help, GetIndexOfTransition, GetFirstIndex, GetNextIndex, Equals, HashCode, HashCode2 );
00083     help->symbol = Malloc( 1, aut->data->elementSize );
00084     return help;
00085 }
00086 
00087 void CDAWGBuildHelpFree( CDAWGBuildHelp * help ){
00088     UINTSequenceFree( help->statesSuffixLink );
00089     UINTSequenceFree( help->statesLength );
00090     UINTSequenceFree( help->transitionsFrom );
00091     UINTSequenceFree( help->transitionsNext );
00092     HashFree( help->hash );
00093     Free( help->symbol );
00094     Free( help );
00095 }
00096 
00097 PairState Delta( CDAWGBuildHelp * help, const PairState * state, const void * symbol, UINT * tr ){
00098     PairState nextState;
00099     CompressedAutomaton * a;
00100 
00101     a = help->aut;
00102     nextState.transition = NO;
00103     nextState.index = NO;
00104     if( state->transition != NO ){
00105         (*tr) = state->transition;
00106         if( CmpSymbols( mVoidSequenceElement(a->data, state->index), symbol, a->data->elementSize ) != 0 ){
00107             return nextState;
00108         }
00109         nextState.index = state->index + 1;
00110         nextState.transition = state->transition;
00111         if( nextState.index > a->statesEnd->seq[a->transitionsTo->seq[state->transition]] ){
00112             nextState.transition = NO;
00113             nextState.index = a->transitionsTo->seq[state->transition];
00114         }
00115     }
00116     else{
00117         TransitionFromStartTo tfst;
00118 
00119         tfst.from = state->index;
00120         tfst.symbol = symbol;
00121         (*tr) = HashGetIndex( help->hash, &tfst );
00122         if( (*tr) == NO ){
00123             return nextState;
00124         }
00125         nextState.index = a->transitionsStart->seq[(*tr)] + 1;
00126         nextState.transition = (*tr);
00127         if( nextState.index > a->statesEnd->seq[a->transitionsTo->seq[(*tr)]] ){
00128             nextState.transition = NO;
00129             nextState.index = a->transitionsTo->seq[(*tr)];
00130         }
00131     }
00132     return nextState;
00133 }
00134 
00135 static UINT NewState( CDAWGBuildHelp * help ){
00136     UINT newState;
00137 
00138     newState = mCompressedAutomatonGetStatesStored( help->aut );
00139     CompressedAutomatonAddState( help->aut, newState );
00140     UINTSequenceAdd( help->statesSuffixLink, NO );
00141     UINTSequenceAdd( help->statesLength, NO );
00142     return newState;
00143 }
00144 
00145 static UINT Split( CDAWGBuildHelp * help, const PairState * state ){
00146     UINT newState, from, to;
00147     TransitionFromStartTo tfst;
00148 
00149     newState = NewState( help );
00150     help->aut->statesEnd->seq[newState] = state->index - 1;
00151     from = help->transitionsFrom->seq[state->transition];
00152     to = help->aut->transitionsTo->seq[state->transition];
00153     help->statesLength->seq[newState] = help->statesLength->seq[from] + (state->index - help->aut->transitionsStart->seq[state->transition]);
00154     help->aut->transitionsTo->seq[state->transition] = newState;
00155     tfst.from = newState;
00156     tfst.start = state->index;
00157     tfst.to = to;
00158     memcpy( help->symbol, mVoidSequenceElement(help->aut->data, state->index), help->aut->data->elementSize );
00159     tfst.symbol = help->symbol;
00160     HashAdd( help->hash, &tfst );
00161     return newState;
00162 }
00163 
00164 static PairState SuffixLink( CDAWGBuildHelp * help, const PairState * state ){
00165     PairState fState;
00166     TransitionFromStartTo tfst;
00167     UINT s, start, end, tr, startTr, endTr;
00168 
00169     if( state->transition == NO ){
00170         fState.transition = NO;
00171         fState.index = help->statesSuffixLink->seq[state->index];
00172         return fState;
00173     }
00174     s = help->transitionsFrom->seq[state->transition];
00175     start = help->aut->transitionsStart->seq[state->transition];
00176     end = state->index - 1;
00177     if( s == help->aut->initialState ){
00178         start++;
00179         if( start > end ){
00180             return CompressedAutomatonGetInitialPairState( help->aut );
00181         }
00182     }
00183     else{
00184         s = help->statesSuffixLink->seq[s];
00185     }
00186     while( bTRUE ){
00187         tfst.from = s;
00188         tfst.symbol = mVoidSequenceElement( help->aut->data, start );
00189         tr = HashGetIndex( help->hash, &tfst );
00190         startTr = help->aut->transitionsStart->seq[tr];
00191         endTr = help->aut->statesEnd->seq[help->aut->transitionsTo->seq[tr]];
00192         if( end - start < endTr - startTr ){
00193             fState.index = startTr + end - start + 1;
00194             fState.transition = tr;
00195             break;
00196         }
00197         if( end - start == endTr - startTr ){
00198             fState.index = help->aut->transitionsTo->seq[tr];
00199             fState.transition = NO;
00200             break;
00201         }
00202         s = help->aut->transitionsTo->seq[tr];
00203         start += (endTr - startTr + 1);
00204     }
00205     return fState;
00206 }
00207 
00209 static PairState Separate( CDAWGBuildHelp * help, const PairState * state, UINT i ){
00210     TransitionFromStartTo tfst;
00211     PairState nextState, st, q;
00212     UINT length, newState, tr, t;
00213 
00214     nextState = Delta( help, state, mVoidSequenceElement(help->aut->data, i), &t );
00215     if( nextState.transition != NO ){
00216         return nextState;
00217     }
00218     if( state->transition != NO ){
00219         UINT from, to;
00220 
00221         from = help->transitionsFrom->seq[state->transition];
00222         to = help->aut->transitionsTo->seq[state->transition];
00223         length = help->statesLength->seq[from] + help->aut->statesEnd->seq[to] - help->aut->transitionsStart->seq[state->transition] + 1;
00224     }
00225     else{
00226         length = help->statesLength->seq[state->index] + 1;
00227     }
00228     if( help->statesLength->seq[nextState.index] == length ){
00229         return nextState;
00230     }
00231     newState = NewState( help );
00232     help->statesLength->seq[newState] = length;
00233     help->aut->statesEnd->seq[newState] = help->aut->statesEnd->seq[nextState.index];
00234     help->statesSuffixLink->seq[newState] = help->statesSuffixLink->seq[nextState.index];
00235     help->statesSuffixLink->seq[nextState.index] = newState;
00236     tfst.from = newState;
00237     for( tr = help->aut->statesTransitions->seq[nextState.index]; tr != NO; tr = help->transitionsNext->seq[tr] ){
00238         tfst.start = help->aut->transitionsStart->seq[tr];
00239         tfst.to = help->aut->transitionsTo->seq[tr];
00240         memcpy( help->symbol, mVoidSequenceElement(help->aut->data, tfst.start), help->aut->data->elementSize );
00241         tfst.symbol = help->symbol;
00242         HashAdd( help->hash, &tfst );
00243     }
00244     st = (*state);
00245     while( bTRUE ){
00246         help->aut->transitionsTo->seq[t] = newState;
00247         if( st.transition == NO && st.index == help->aut->initialState ){
00248             break;
00249         }
00250         st = SuffixLink( help, &st );
00251         q = Delta( help, &st, mVoidSequenceElement(help->aut->data, i), &t );
00252         if( q.transition != nextState.transition || q.index != nextState.index ){
00253             break;
00254         }
00255     }
00256     nextState.transition = NO;
00257     nextState.index = newState;
00258     return nextState;
00259 }
00260 
00261 
00262 static PairState Update( CDAWGBuildHelp * help, const PairState * state, UINT i, UINT * sink, UINT infinity, boolean * slink ){
00263 /*
00264     function update(s, (k, p)): returns pair of integers;
00265     // (s, (k, p − 1)) is the canonical reference pair for the active point.
00266 
00267     c := w[p]; 
00268     oldr := nil;
00269 
00270     while not check_end_point(s,(k,p−1),c) do
00271         if k ≤ p−1 then
00272             r := split_edge(s,(k,p−1)); 
00273         else 
00274             r := s; // explicit case.
00275 
00276     create node r′; 
00277     create edge (r, (p, ∞), r′);
00278     
00279     if oldr != nil then 
00280         suf(oldr) := r;
00281         
00282     oldr := r;
00283     
00284     (s, k) := canonize(suf (s), (k, p − 1)); 
00285     
00286     if oldr != nil then 
00287         suf(oldr) := s; 
00288         
00289     return canonize(s, (k, p));
00290 */
00291     PairState state_with_i;
00292     UINT r, prevr, succ, t;
00293     TransitionFromStartTo tfst;
00294 
00295     r = succ = NO;
00296     state_with_i = (*state);
00297     prevr = help->aut->initialState;
00298     while( bTRUE ){
00299         (*slink) = ( Delta( help, &state_with_i, mVoidSequenceElement(help->aut->data, i), &t ).index != NO );
00300         if( *slink ){
00301             break;
00302         }
00303         if( state_with_i.transition != NO ){
00304             if( succ == help->aut->transitionsTo->seq[state_with_i.transition] ){
00305                 help->aut->transitionsTo->seq[state_with_i.transition] = r;
00306                 state_with_i = SuffixLink( help, &state_with_i );
00307                 continue;
00308             }
00309             else{
00310                 succ = help->aut->transitionsTo->seq[state_with_i.transition];
00311                 r = Split( help, &state_with_i );
00312             }
00313         }
00314         else{
00315             r = state_with_i.index;
00316         }
00317         tfst.from = r;
00318         tfst.start = i;
00319         if( (*sink) == NO ){
00320             (*sink) = NewState( help );
00321             help->statesLength->seq[(*sink)] = infinity;
00322             help->aut->statesEnd->seq[(*sink)] = help->aut->data->seqStored - 1;
00323         }
00324         tfst.to = (*sink);
00325         memcpy( help->symbol, mVoidSequenceElement(help->aut->data, i), help->aut->data->elementSize );
00326         tfst.symbol = help->symbol;
00327         HashAdd( help->hash, &tfst );
00328         if( prevr != help->aut->initialState ){
00329             help->statesSuffixLink->seq[prevr] = r;
00330         }
00331         if( state_with_i.transition == NO && state_with_i.index == help->aut->initialState ){
00332             return state_with_i;
00333         }
00334         prevr = r;
00335         state_with_i = SuffixLink( help, &state_with_i );
00336     }
00337     if( prevr != help->aut->initialState ){
00338         help->statesSuffixLink->seq[prevr] = state_with_i.index;
00339     }
00340     return Separate( help, &state_with_i, i );
00341 }
00342 
00344 UINT CDAWGAdd( CDAWGBuildHelp * help, const VoidSequence * documentDollar ){
00345     PairState state;
00346     UINT i, sink;
00347     boolean slink;
00348     
00349     i = help->aut->data->seqStored; // WTF?
00350 
00351     // make sure there is a dollar at the end
00352     VoidSequenceAppend( help->aut->data, documentDollar );
00353 
00354 
00355     // create_nodes root and ⊥;
00356     if( help->aut->initialState == NO ){
00357         UINT newState;
00358 
00359         newState = NewState( help );
00360         help->statesLength->seq[newState] = 0;
00361         help->aut->initialState = newState;
00362     }
00363     state = CompressedAutomatonGetInitialPairState( help->aut );
00364 
00365     // Not doing this step?
00366     // for j := 1 to m do 
00367         //create_edge (⊥, (−j, −j), root); 
00368 
00369 
00370     sink = NO;
00371     for( ; i < help->aut->data->seqStored; i++ ){
00372         state = Update( help, &state, i, &sink, documentDollar->seqStored, &slink );
00373     }
00374     if( sink != NO ){
00375         if( slink ){
00376             help->statesSuffixLink->seq[sink] = state.index;
00377         }
00378         else{
00379             help->statesSuffixLink->seq[sink] = help->aut->initialState;
00380         }
00381     }
00382     return sink;
00383 }