SIS
Symmetric Index Structures
/Users/dbr/ma/src/bas/lml/ukkonenSuffixTree.c
Go to the documentation of this file.
00001 #include "base.h"
00002 
00003 static UINT GetIndexOfTransition( void * ptr, const void * object ){
00004     SuffixTreeBuildHelp * help;
00005     TransitionFromStartTo * tr;
00006     UINT index;
00007 
00008     help = (SuffixTreeBuildHelp *)(ptr);
00009     index = mCompressedAutomatonGetTransitionsStored( help->aut );
00010     tr = (TransitionFromStartTo *)(object);
00011     UINTSequenceAdd( help->transitionsFrom, tr->from );
00012     CompressedAutomatonAddTransition( help->aut, tr->from, tr->start, tr->to );
00013     return index;
00014 }
00015 
00016 static UINT GetFirstIndex( void * ptr ){
00017     SuffixTreeBuildHelp * help;
00018 
00019     help = (SuffixTreeBuildHelp *)(ptr);
00020     if( mCompressedAutomatonGetTransitionsStored( help->aut ) == 0 ){
00021         return NO;
00022     }
00023     return 0;
00024 }
00025 
00026 static UINT GetNextIndex( void * ptr, UINT index ){
00027     SuffixTreeBuildHelp * help;
00028 
00029     help = (SuffixTreeBuildHelp *)(ptr);
00030     index++;
00031     if( index < mCompressedAutomatonGetTransitionsStored( help->aut ) ){
00032         return index;
00033     }
00034     return NO;
00035 }
00036 
00037 static boolean Equals( void * ptr, const void * object, UINT index ){
00038     SuffixTreeBuildHelp * help;
00039     TransitionFromStartTo * tr;
00040 
00041     help = (SuffixTreeBuildHelp *)(ptr);
00042     tr = (TransitionFromStartTo *)(object);
00043     return( help->transitionsFrom->seq[index] == tr->from &&
00044             CmpSymbols( mVoidSequenceElement(help->aut->data, help->aut->transitionsStart->seq[index]), tr->symbol, help->aut->data->elementSize ) == 0 );
00045 }
00046 
00047 static UINT HashCode( void * ptr, const void * object, UINT hashSize ){
00048     SuffixTreeBuildHelp * help;
00049     TransitionFromStartTo * tr;
00050 
00051     help = (SuffixTreeBuildHelp *)(ptr);
00052     tr = (TransitionFromStartTo *)(object);
00053     return CodeBytes( CodeUINT( 0, tr->from, hashSize ), tr->symbol, help->aut->data->elementSize, hashSize );
00054 }
00055 
00056 static UINT HashCode2( void * ptr, UINT index, UINT hashSize ){
00057     SuffixTreeBuildHelp * help;
00058 
00059     help = (SuffixTreeBuildHelp *)(ptr);
00060     return CodeBytes( CodeUINT( 0, help->transitionsFrom->seq[index], hashSize ),
00061                       mVoidSequenceElement( help->aut->data, help->aut->transitionsStart->seq[index] ),
00062                       help->aut->data->elementSize,
00063                       hashSize );
00064 }
00065 
00066 SuffixTreeBuildHelp * SuffixTreeBuildHelpInit( CompressedAutomaton * aut ){
00067     SuffixTreeBuildHelp * help;
00068 
00069     help = Malloc( 1, sizeof(SuffixTreeBuildHelp) );
00070     help->aut = aut;
00071     help->statesSuffixLink = UINTSequenceInit2( aut->statesTransitions->seqAlloced, 1024 );
00072     help->transitionsFrom = UINTSequenceInit2( aut->transitionsStart->seqAlloced, 1024 );
00073     help->hash = HashInit( help, GetIndexOfTransition, GetFirstIndex, GetNextIndex, Equals, HashCode, HashCode2 );
00074     help->symbol = Malloc( 1, aut->data->elementSize );
00075     return help;
00076 }
00077 
00078 void SuffixTreeBuildHelpFree( SuffixTreeBuildHelp * help ){
00079     UINTSequenceFree( help->statesSuffixLink );
00080     UINTSequenceFree( help->transitionsFrom );
00081     HashFree( help->hash );
00082     Free( help->symbol );
00083     Free( help );
00084 }
00085 
00086 static PairState Delta( SuffixTreeBuildHelp * help, const PairState * state, const void * symbol ){
00087     PairState nextState;
00088     CompressedAutomaton * a;
00089 
00090     a = help->aut;
00091     nextState.transition = NO;
00092     nextState.index = NO;
00093     if( state->transition != NO ){
00094         if( CmpSymbols( mVoidSequenceElement(a->data, state->index), symbol, a->data->elementSize ) != 0 ){
00095             return nextState;
00096         }
00097         nextState.index = state->index + 1;
00098         nextState.transition = state->transition;
00099         if( nextState.index > a->statesEnd->seq[a->transitionsTo->seq[state->transition]] ){
00100             nextState.transition = NO;
00101             nextState.index = a->transitionsTo->seq[state->transition];
00102         }
00103     }
00104     else{
00105         TransitionFromStartTo tfst;
00106         UINT tr;
00107 
00108         tfst.from = state->index;
00109         tfst.symbol = symbol;
00110         tr = HashGetIndex( help->hash, &tfst );
00111         if( tr == NO ){
00112             return nextState;
00113         }
00114         nextState.index = a->transitionsStart->seq[tr] + 1;
00115         nextState.transition = tr;
00116         if( nextState.index > a->statesEnd->seq[a->transitionsTo->seq[tr]] ){
00117             nextState.transition = NO;
00118             nextState.index = a->transitionsTo->seq[tr];
00119         }
00120     }
00121     return nextState;
00122 }
00123 
00124 static UINT NewState( SuffixTreeBuildHelp * help ){
00125     UINT newState;
00126 
00127     newState = mCompressedAutomatonGetStatesStored( help->aut );
00128     CompressedAutomatonAddState( help->aut, newState );
00129     UINTSequenceAdd( help->statesSuffixLink, NO );
00130     return newState;
00131 }
00132 
00133 static boolean TestAndSplit( SuffixTreeBuildHelp * help, const PairState * state, UINT i, UINT * r ){
00134     PairState nextState;
00135     UINT newState, to;
00136     TransitionFromStartTo tfst;
00137 
00138     nextState = Delta( help, state, mVoidSequenceElement(help->aut->data, i) );
00139     if( nextState.index != NO ){
00140         return bTRUE;
00141     }
00142     if( state->transition == NO ){
00143         (*r) = state->index;
00144         return bFALSE;
00145     }
00146     newState = NewState( help );
00147     help->aut->statesEnd->seq[newState] = state->index - 1;
00148     to = help->aut->transitionsTo->seq[state->transition];
00149     help->aut->transitionsTo->seq[state->transition] = newState;
00150     tfst.from = newState;
00151     tfst.start = state->index;
00152     tfst.to = to;
00153     memcpy( help->symbol, mVoidSequenceElement(help->aut->data, state->index), help->aut->data->elementSize );
00154     tfst.symbol = help->symbol;
00155     HashAdd( help->hash, &tfst );
00156     (*r) = newState;
00157     return bFALSE;
00158 }
00159 
00160 static PairState SuffixLink( SuffixTreeBuildHelp * help, const PairState * state ){
00161     PairState fState;
00162     TransitionFromStartTo tfst;
00163     UINT s, start, end, tr, startTr, endTr;
00164 
00165     if( state->transition == NO ){
00166         fState.transition = NO;
00167         fState.index = help->statesSuffixLink->seq[state->index];
00168         return fState;
00169     }
00170     s = help->transitionsFrom->seq[state->transition];
00171     start = help->aut->transitionsStart->seq[state->transition];
00172     end = state->index - 1;
00173     if( s == help->aut->initialState ){
00174         start++;
00175         if( start > end ){
00176             return CompressedAutomatonGetInitialPairState( help->aut );
00177         }
00178     }
00179     else{
00180         s = help->statesSuffixLink->seq[s];
00181     }
00182     while( bTRUE ){
00183         tfst.from = s;
00184         tfst.symbol = mVoidSequenceElement( help->aut->data, start );
00185         tr = HashGetIndex( help->hash, &tfst );
00186         startTr = help->aut->transitionsStart->seq[tr];
00187         endTr = help->aut->statesEnd->seq[help->aut->transitionsTo->seq[tr]];
00188         if( end - start < endTr - startTr ){
00189             fState.index = startTr + end - start + 1;
00190             fState.transition = tr;
00191             break;
00192         }
00193         if( end - start == endTr - startTr ){
00194             fState.index = help->aut->transitionsTo->seq[tr];
00195             fState.transition = NO;
00196             break;
00197         }
00198         s = help->aut->transitionsTo->seq[tr];
00199         start += (endTr - startTr + 1);
00200     }
00201     return fState;
00202 }
00203 
00204 static PairState Update( SuffixTreeBuildHelp * help, const PairState * state, UINT i ){
00205     PairState state_with_i;
00206     UINT r, newState, prevr;
00207     TransitionFromStartTo tfst;
00208 
00209     state_with_i = (*state);
00210     prevr = help->aut->initialState;
00211     while( !TestAndSplit( help, &state_with_i, i, &r ) ){
00212         newState = NewState( help );
00213         help->aut->statesEnd->seq[newState] = help->aut->data->seqStored - 1;
00214         tfst.from = r;
00215         tfst.start = i;
00216         tfst.to = newState;
00217         memcpy( help->symbol, mVoidSequenceElement(help->aut->data, i), help->aut->data->elementSize );
00218         tfst.symbol = help->symbol;
00219         HashAdd( help->hash, &tfst );
00220         if( prevr != help->aut->initialState ){
00221             help->statesSuffixLink->seq[prevr] = r;
00222         }
00223         if( state_with_i.transition == NO && state_with_i.index == help->aut->initialState ){
00224             return state_with_i;
00225         }
00226         prevr = r;
00227         state_with_i = SuffixLink( help, &state_with_i );
00228     }
00229     if( prevr != help->aut->initialState ){
00230         help->statesSuffixLink->seq[prevr] = state_with_i.index;
00231     }
00232     return Delta( help, &state_with_i, mVoidSequenceElement(help->aut->data, i) );
00233 }
00234 
00235 void SuffixTreeAdd( SuffixTreeBuildHelp * help, const VoidSequence * documentDollar ){
00236     PairState state, nextState;
00237     UINT i;
00238 
00239     i = help->aut->data->seqStored;
00240     VoidSequenceAppend( help->aut->data, documentDollar );
00241     if( help->aut->initialState == NO ){
00242         UINT newState;
00243 
00244         newState = NewState( help );
00245         help->aut->initialState = newState;
00246         state = CompressedAutomatonGetInitialPairState( help->aut );
00247     }
00248     else{
00249         state = CompressedAutomatonGetInitialPairState( help->aut );
00250         for( ; i < help->aut->data->seqStored; i++ ){
00251             nextState = Delta( help, &state, mVoidSequenceElement(help->aut->data, i) );
00252             if( nextState.index == NO ){
00253                 break;
00254             }
00255             state = nextState;
00256         }
00257     }
00258     for( ; i < help->aut->data->seqStored; i++ ){
00259         state = Update( help, &state, i );
00260     }
00261 }