SIS
Symmetric Index Structures
/Users/dbr/ma/src/bas/lml/compressedAutomaton.c
Go to the documentation of this file.
00001 #include "base.h"
00002 
00003 CompressedAutomaton * CompressedAutomatonInit( UINT symbolSize ){
00004     CompressedAutomaton * aut;
00005 
00006     aut = Malloc( 1, sizeof(CompressedAutomaton) );
00007     aut->initialState = NO;
00008     aut->statesTransitions = UINTSequenceInit2( 1024, 1024 );
00009     aut->statesNumberOfTransitions = UINTSequenceInit2( 1024, 1024 );
00010     aut->statesEnd = UINTSequenceInit2( 1024, 1024 );
00011     aut->transitionsStart = UINTSequenceInit2( 1024, 1024 );
00012     aut->transitionsTo = UINTSequenceInit2( 1024, 1024 );
00013     aut->data = VoidSequenceInit2( symbolSize, 1024, 1024 );
00014     aut->tt = NULL;
00015     return aut;
00016 }
00017 
00018 void CompressedAutomatonAddState( CompressedAutomaton * aut, UINT state ){
00019     UINT statesStored;
00020 
00021     statesStored = mCompressedAutomatonGetStatesStored( aut );
00022     if( statesStored <= state ){
00023         UINT i;
00024 
00025         for( i = statesStored; i <= state; i++ ){
00026             UINTSequenceAdd( aut->statesTransitions, NO );
00027             UINTSequenceAdd( aut->statesNumberOfTransitions, 0 );
00028             UINTSequenceAdd( aut->statesEnd, NO );
00029         }
00030     }
00031 }
00032 
00033 void CompressedAutomatonAddTransition( CompressedAutomaton * aut, UINT stateFrom, UINT start, UINT stateTo ){
00034     UINT transitionsStored;
00035 
00036     transitionsStored = mCompressedAutomatonGetTransitionsStored( aut );
00037     CompressedAutomatonAddState( aut, mMAX( stateFrom, stateTo ) );
00038     UINTSequenceAdd( aut->transitionsStart, start );
00039     UINTSequenceAdd( aut->transitionsTo, stateTo );
00040     aut->statesNumberOfTransitions->seq[stateFrom]++;
00041     if( aut->statesTransitions->seq[stateFrom] == NO ){
00042         aut->statesTransitions->seq[stateFrom] = transitionsStored;
00043     }
00044 }
00045 
00046 void CompressedAutomatonFree( CompressedAutomaton * aut ){
00047     UINTSequenceFree( aut->statesTransitions );
00048     UINTSequenceFree( aut->statesNumberOfTransitions );
00049     UINTSequenceFree( aut->statesEnd );
00050     UINTSequenceFree( aut->transitionsStart );
00051     UINTSequenceFree( aut->transitionsTo );
00052     if( aut->data != NULL ){
00053         VoidSequenceFree( aut->data );
00054     }
00055     if( aut->tt != NULL ){
00056         if( aut->tt->alph != NULL ){
00057             AlphabetFree( aut->tt->alph );
00058         }
00059         TarjanTableFree( aut->tt );
00060     }
00061     Free( aut );
00062 }
00063 
00064 void CompressedAutomatonWrite( const CompressedAutomaton * aut, FILE * fp ){
00065     U8 * element;
00066     U8 tt;
00067 
00068     if( endiannessOfThisMachine != endiannessOnWrite ){
00069         element = (U8 *)(&(aut->initialState)); mReverseBytes( element, sizeof(UINT) )
00070     }
00071     mWriteEndianness( fp, endiannessOnWrite )
00072     Fwrite( &(aut->initialState), sizeof(UINT), 1, fp );
00073     if( endiannessOfThisMachine != endiannessOnWrite ){
00074         element = (U8 *)(&(aut->initialState)); mReverseBytes( element, sizeof(UINT) )
00075     }
00076     UINTSequenceWrite( aut->statesTransitions, fp );
00077     UINTSequenceWrite( aut->statesNumberOfTransitions, fp );
00078     UINTSequenceWrite( aut->statesEnd, fp );
00079     UINTSequenceWrite( aut->transitionsStart, fp );
00080     UINTSequenceWrite( aut->transitionsTo, fp );
00081     if( aut->data == NULL ){
00082         tt = 0;
00083         Fwrite( &tt, sizeof(U8), 1, fp );
00084     }
00085     else{
00086         UINT sizes[1];
00087 
00088         sizes[0] = aut->data->elementSize;
00089         tt = 1;
00090         Fwrite( &tt, sizeof(U8), 1, fp );
00091         VoidSequenceWrite( aut->data, fp, sizes, 1 );
00092     }
00093     if( aut->tt != NULL ){
00094         tt = 1;
00095         Fwrite( &tt, sizeof(U8), 1, fp );
00096         TarjanTableWrite( aut->tt, fp );
00097     }
00098     else{
00099         tt = 0;
00100         Fwrite( &tt, sizeof(U8), 1, fp );
00101     }
00102 }
00103 
00104 CompressedAutomaton * CompressedAutomatonRead( FILE * fp ){
00105     CompressedAutomaton * aut;
00106     U8 * element;
00107     UINT endianness;
00108     U8 tt;
00109 
00110     aut = (CompressedAutomaton *)Malloc( 1, sizeof(CompressedAutomaton) );
00111     mReadEndianness( fp, endianness )
00112     Fread( &(aut->initialState), sizeof(UINT), 1, fp );
00113     if( endiannessOfThisMachine != endianness ){
00114         element = (U8 *)(&(aut->initialState)); mReverseBytes( element, sizeof(UINT) )
00115     }
00116     aut->statesTransitions = UINTSequenceRead( fp );
00117     aut->statesNumberOfTransitions = UINTSequenceRead( fp );
00118     aut->statesEnd = UINTSequenceRead( fp );
00119     aut->transitionsStart = UINTSequenceRead( fp );
00120     aut->transitionsTo = UINTSequenceRead( fp );
00121     Fread( &tt, sizeof(U8), 1, fp );
00122     if( tt == 1 ){
00123         aut->data = VoidSequenceRead( fp );
00124     }
00125     else{
00126         aut->data = NULL;
00127     }
00128     Fread( &tt, sizeof(U8), 1, fp );
00129     if( tt == 1 ){
00130         aut->tt = TarjanTableRead( fp );
00131     }
00132     else{
00133         aut->tt = NULL;
00134     }
00135     return aut;
00136 }
00137 
00138 void CompressedAutomatonDumpStat( const CompressedAutomaton * aut, FILE * fp ){
00139     double size;
00140 
00141     if( aut->data != NULL ){
00142         fprintf( fp, "number of data symbols: %llu\n", (U64)(aut->data->seqStored) );
00143         fprintf( fp, "symbol size: %llu\n", (U64)(aut->data->elementSize) );
00144         size = aut->data->seqStored;
00145         size *= aut->data->elementSize;
00146         size /= 1024.0;
00147         size /= 1024.0;
00148         fprintf( fp, "data size: %.2f MB\n", size );
00149     }
00150     fprintf( fp, "number of states: %llu\n", (U64)mCompressedAutomatonGetStatesStored(aut) );
00151     fprintf( fp, "number of transitions: %llu\n", (U64)mCompressedAutomatonGetTransitionsStored(aut) );
00152 }
00153 
00154 void CompressedAutomatonShrink( CompressedAutomaton * aut ){
00155     UINTSequenceShrink( aut->statesTransitions );
00156     UINTSequenceShrink( aut->statesNumberOfTransitions );
00157     UINTSequenceShrink( aut->statesEnd );
00158     UINTSequenceShrink( aut->transitionsStart );
00159     UINTSequenceShrink( aut->transitionsTo );
00160     if( aut->data != NULL ){
00161         VoidSequenceShrink( aut->data );
00162     }
00163 }
00164 
00165 void GenerateLanguage( const CompressedAutomaton * aut, UINT state, VoidSequence * line, FILE * fp, void (*DumpSequenceOfLabels)(FILE *, const VoidSequence *, UINT), UINT encoding ){
00166     UINT i, j, tr, end, seqStored;
00167 
00168     if( aut->statesNumberOfTransitions->seq[state] == 0 ){
00169         DumpSequenceOfLabels( fp, line, encoding );
00170     }
00171     tr = aut->statesTransitions->seq[state];
00172     seqStored = line->seqStored;
00173     for( i = 0; i < aut->statesNumberOfTransitions->seq[state]; i++ ){
00174         end = aut->statesEnd->seq[aut->transitionsTo->seq[tr+i]];
00175         for( j = aut->transitionsStart->seq[tr+i]; j <= end; j++ ){
00176             VoidSequenceAdd( line, mVoidSequenceElement(aut->data, j) );
00177         }
00178         GenerateLanguage( aut, aut->transitionsTo->seq[tr+i], line, fp, DumpSequenceOfLabels, encoding );
00179         line->seqStored = seqStored;
00180     }
00181 }
00182 
00183 void CompressedAutomatonGenerateLanguage( const CompressedAutomaton * aut, FILE * fp, void (*DumpSequenceOfLabels)(FILE *, const VoidSequence *, UINT), UINT encoding ){
00184     VoidSequence * line;
00185 
00186     if( aut->initialState == NO ){
00187         return;
00188     }
00189     line = VoidSequenceInit( aut->data->elementSize );
00190     GenerateLanguage( aut, aut->initialState, line, fp, DumpSequenceOfLabels, encoding );
00191     VoidSequenceFree( line );
00192 }
00193 
00194 static void GenerateLeftLanguage( const CompressedAutomaton * aut,
00195                                   UINT state,
00196                                   VoidSequence * line,
00197                                   FILE * fp,
00198                                   void (*DumpSequenceOfLabels)(FILE *, const VoidSequence *, UINT),
00199                                   UINT encoding ){
00200     UINT i, j, tr, end, seqStored;
00201 
00202     if( aut->statesNumberOfTransitions->seq[state] == 0 ){
00203         DumpSequenceOfLabels( fp, line, encoding );
00204     }
00205     tr = aut->statesTransitions->seq[state];
00206     seqStored = line->seqStored;
00207     for( i = 0; i < aut->statesNumberOfTransitions->seq[state]; i++ ){
00208         end = aut->statesEnd->seq[aut->transitionsTo->seq[tr+i]];
00209         for( j = aut->transitionsStart->seq[tr+i]+1; j > end; j-- ){
00210             VoidSequenceAdd( line, mVoidSequenceElement(aut->data, j-1) );
00211         }
00212         GenerateLeftLanguage( aut, aut->transitionsTo->seq[tr+i], line, fp, DumpSequenceOfLabels, encoding );
00213         line->seqStored = seqStored;
00214     }
00215 }
00216 
00217 void CompressedLeftAutomatonGenerateLanguage( const CompressedAutomaton * aut, FILE * fp,
00218 void (*DumpSequenceOfLabels)(FILE *, const VoidSequence *, UINT), UINT encoding ){
00219     VoidSequence * line;
00220 
00221     if( aut->initialState == NO ){
00222         return;
00223     }
00224     line = VoidSequenceInit( aut->data->elementSize );
00225     GenerateLeftLanguage( aut, aut->initialState, line, fp, DumpSequenceOfLabels, encoding );
00226     VoidSequenceFree( line );
00227 }
00228 
00229 typedef struct tCompressedAutomatonTransitionsFrom{
00230     CompressedAutomaton * aut;
00231     UINTSequence * transitionsFrom;
00232 } CompressedAutomatonTransitionsFrom;
00233 
00234 static SINT CmpTransitions( void * ptr, UINT t1, UINT t2 ){
00235     CompressedAutomatonTransitionsFrom * catf;
00236     SINT labelCmp;
00237 
00238     catf = (CompressedAutomatonTransitionsFrom *)(ptr);
00239     if( catf->transitionsFrom->seq[t1] < catf->transitionsFrom->seq[t2] ){
00240         return -1;
00241     }
00242     if( catf->transitionsFrom->seq[t1] > catf->transitionsFrom->seq[t2] ){
00243         return 1;
00244     }
00245     labelCmp = CmpSymbols( mVoidSequenceElement(catf->aut->data, catf->aut->transitionsStart->seq[t1]),
00246                            mVoidSequenceElement(catf->aut->data, catf->aut->transitionsStart->seq[t2]),
00247                            catf->aut->data->elementSize );
00248     if( labelCmp != 0 ){
00249         return labelCmp;
00250     }
00251     if( catf->aut->transitionsTo->seq[t1] < catf->aut->transitionsTo->seq[t2] ){
00252         return -1;
00253     }
00254     if( catf->aut->transitionsTo->seq[t1] > catf->aut->transitionsTo->seq[t2] ){
00255         return 1;
00256     }
00257     return 0;
00258 }
00259 
00260 static void SwapTransitions( void * ptr, UINT t1, UINT t2 ){
00261     CompressedAutomatonTransitionsFrom * catf;
00262     UINT tmp;
00263 
00264     catf = (CompressedAutomatonTransitionsFrom *)(ptr);
00265     mSwapVariables( catf->transitionsFrom->seq[t1], catf->transitionsFrom->seq[t2], tmp );
00266     mSwapVariables( catf->aut->transitionsStart->seq[t1], catf->aut->transitionsStart->seq[t2], tmp );
00267     mSwapVariables( catf->aut->transitionsTo->seq[t1], catf->aut->transitionsTo->seq[t2], tmp );
00268 }
00269 
00270 void CompressedAutomatonSortTransitions( CompressedAutomaton * aut, UINTSequence * transitionsFrom ){
00271     CompressedAutomatonTransitionsFrom catf;
00272     UINT i, j, q, transitionsStored;
00273 
00274     for( i = 0; i < mCompressedAutomatonGetStatesStored(aut); i++ ){
00275         aut->statesTransitions->seq[i] = NO;
00276         aut->statesNumberOfTransitions->seq[i] = 0;
00277     }
00278     transitionsStored = mCompressedAutomatonGetTransitionsStored( aut );
00279     if( transitionsStored == 0 ){
00280         return;
00281     }
00282     catf.aut = aut;
00283     catf.transitionsFrom = transitionsFrom;
00284     Sort( &catf, transitionsStored, CmpTransitions, SwapTransitions );
00285     j = 1;
00286     q = transitionsFrom->seq[0];
00287     aut->statesTransitions->seq[q] = 0;
00288     aut->statesNumberOfTransitions->seq[q] = 1;
00289     for( i = 1; i < transitionsStored; i++ ){
00290         if( CmpTransitions( &catf, j-1, i ) != 0 ){
00291             if( j != i ){
00292                 transitionsFrom->seq[j] = transitionsFrom->seq[i];
00293                 aut->transitionsStart->seq[j] = aut->transitionsStart->seq[i];
00294                 aut->transitionsTo->seq[j] = aut->transitionsTo->seq[i];
00295             }
00296             if( transitionsFrom->seq[j] == q ){
00297                 aut->statesNumberOfTransitions->seq[q]++;
00298             }
00299             else{
00300                 q = transitionsFrom->seq[j];
00301                 aut->statesTransitions->seq[q] = j;
00302                 aut->statesNumberOfTransitions->seq[q] = 1;
00303             }
00304             j++;
00305         }
00306     }
00307     if( j < transitionsStored ){
00308         transitionsFrom->seqStored = j;
00309         aut->transitionsStart->seqStored = j;
00310         aut->transitionsTo->seqStored = j;
00311         UINTSequenceShrink( transitionsFrom );
00312         UINTSequenceShrink( aut->transitionsStart );
00313         UINTSequenceShrink( aut->transitionsTo );
00314     }
00315 }
00316 
00317 static SINT Cmp( const void * set, UINT tr, const void * object ){
00318     const CompressedAutomaton * a;
00319 
00320     a = (const CompressedAutomaton *)(set);
00321     return CmpSymbols( mVoidSequenceElement(a->data, a->transitionsStart->seq[tr]), object, a->data->elementSize );
00322 }
00323 
00324 PairState CompressedAutomatonDelta( const CompressedAutomaton * a, const PairState * state, const void * symbol ){
00325     PairState nextState;
00326 
00327     nextState.transition = NO;
00328     nextState.index = NO;
00329     if( state->transition != NO ){
00330         if( CmpSymbols( mVoidSequenceElement(a->data, state->index), symbol, a->data->elementSize ) != 0 ){
00331             return nextState;
00332         }
00333         nextState.index = state->index + 1;
00334         nextState.transition = state->transition;
00335         if( nextState.index > a->statesEnd->seq[a->transitionsTo->seq[state->transition]] ){
00336             nextState.transition = NO;
00337             nextState.index = a->transitionsTo->seq[state->transition];
00338         }
00339     }
00340     else{
00341         UINT tr;
00342 
00343         if( a->tt != NULL ){
00344             UINT code;
00345 
00346             code = SymbolToUINT( symbol, a->data->elementSize );
00347             code = mAlphabetGetCode( a->tt->alph, code );
00348             if( code == NO_CODE ){
00349                 return nextState;
00350             }
00351             tr = a->tt->table->seq[a->tt->statesTransitions->seq[state->index] + code];
00352             if( tr == NO || a->statesNumberOfTransitions->seq[state->index] == 0 ||
00353                 tr < a->statesTransitions->seq[state->index] || a->statesTransitions->seq[state->index] + a->statesNumberOfTransitions->seq[state->index] <= tr ){
00354                 return nextState;
00355             }
00356         }
00357         else{
00358             tr = BinarySearch( a, a->statesTransitions->seq[state->index], a->statesNumberOfTransitions->seq[state->index], symbol, Cmp );
00359             if( tr == NO ){
00360                 return nextState;
00361             }
00362         }
00363         nextState.index = a->transitionsStart->seq[tr] + 1;
00364         nextState.transition = tr;
00365         if( nextState.index > a->statesEnd->seq[a->transitionsTo->seq[tr]] ){
00366             nextState.transition = NO;
00367             nextState.index = a->transitionsTo->seq[tr];
00368         }
00369     }
00370     return nextState;
00371 }
00372 
00373 void CompressedAutomatonAddTarjanTable( CompressedAutomaton * aut ){
00374     Alphabet * alph;
00375     Automaton a;
00376     UINT i;
00377 
00378     a.type = AUTOMATON_TYPE_SYMBOL;
00379     a.statesTransitions = aut->statesTransitions;
00380     a.statesNumberOfTransitions = aut->statesNumberOfTransitions;
00381     a.transitionsTo = aut->transitionsTo;
00382     a.transitionsLabel = VoidSequenceInit2( aut->data->elementSize, mCompressedAutomatonGetTransitionsStored(aut), 1 );
00383     for( i = 0; i < mCompressedAutomatonGetTransitionsStored(aut); i++ ){
00384         VoidSequenceAdd( a.transitionsLabel, mVoidSequenceElement(aut->data, aut->transitionsStart->seq[i]) );
00385     }
00386     alph = AlphabetInit();
00387     AlphabetAdd( alph, &a );
00388     aut->tt = TarjanTableInit( alph, &a );
00389     VoidSequenceFree( a.transitionsLabel );
00390 }
00391 
00392 PairState CompressedAutomatonGetInitialPairState( const CompressedAutomaton * aut ){
00393     PairState state;
00394 
00395     state.transition = NO;
00396     state.index = aut->initialState;
00397     return state;
00398 }
00399 
00400 void CompressedAutomatonDumpGV( CompressedAutomaton * aut, FILE * fp, void (*DumpLabel)(const CompressedAutomaton *, FILE *, UINT, UINT), UINT encoding ){
00401     S8 tmp[MAX_INPUT_STRING_SIZE];
00402     UINT i, s, tr, statesStored, symbolSize;
00403 
00404     symbolSize = aut->data->elementSize;
00405     PrintString( fp, "Digraph A{ \n", symbolSize, encoding );
00406     PrintString( fp, "rankdir=LR;\n", symbolSize, encoding );
00407     statesStored = mCompressedAutomatonGetStatesStored( aut );
00408     PrintString( fp, "node[shape = point, color = white]; \"\";\n", symbolSize, encoding );
00409     PrintString( fp, "node [shape = circle, color = black];\n", symbolSize, encoding );
00410     sprintf( tmp, "\"\" -> %llu;\n", (U64)(aut->initialState) );
00411     PrintString( fp, tmp, symbolSize, encoding );
00412     for( s = 0; s < statesStored; s++ ){
00413         for( i = 0; i < aut->statesNumberOfTransitions->seq[s]; i++ ){
00414             tr = aut->statesTransitions->seq[s] + i;
00415             sprintf( tmp, "%llu -> %llu [ label = \"", (U64)(s), (U64)(aut->transitionsTo->seq[tr]) );
00416             PrintString( fp, tmp, symbolSize, encoding );
00417             DumpLabel( aut, fp, tr, encoding );
00418             PrintString( fp, "\" ];\n", symbolSize, encoding );
00419         }
00420     }
00421     PrintString( fp, "}\n", symbolSize, encoding );
00422 }