SIS
Symmetric Index Structures
/Users/dbr/ma/src/bas/lml/cmd/cmdBuildSuffixTree.c
Go to the documentation of this file.
00001 #include "../base.h"
00002 
00003 void CmdBuildSuffixTree( const S8 * fileNameInput, const S8 * fileNameOutput, UINT bitsPerSymbol, UINT encoding, boolean tarjanTable ){
00004     FILE * fpInput;
00005     FILE * fpOutput;
00006     VoidSequence * line = NULL;
00007     CompressedAutomaton * aut;
00008     SuffixTreeBuildHelp * help;
00009     UINT i, nLine, symbolSize;
00010     mSymbolAndVariables( zero )
00011 
00012     mValidateBitsPerSymbol( bitsPerSymbol )
00013     symbolSize = bitsPerSymbol/8;
00014     mSymbolAssignValue( zero, 0, symbolSize )
00015     aut = CompressedAutomatonInit( symbolSize );
00016     help = SuffixTreeBuildHelpInit( aut );
00017     fpInput = Fopen( fileNameInput, "rb" );
00018     nLine = 0;
00019     while( bTRUE ){
00020         if( line != NULL ){
00021             VoidSequenceFree( line );
00022         }
00023         line = VoidSequenceReadLine( fpInput, symbolSize, encoding );
00024         if( line == NULL ){
00025             break;
00026         }
00027         nLine++;
00028         for( i = 0; i < line->seqStored; i++ ){
00029             if( SymbolToUINT( mVoidSequenceElement(line, i), symbolSize ) == 0 ){
00030                 S8 msg[MAX_INPUT_STRING_SIZE];
00031 
00032                 sprintf( msg, "Cannot process line %llu - 0 is reserved as a new symbol.", (U64)(nLine) );
00033                 Throw( msg );
00034             }
00035         }
00036         VoidSequenceAdd( line, zero );
00037         SuffixTreeAdd( help, line );
00038     }
00039     Fclose( fpInput );
00040     CompressedAutomatonSortTransitions( aut, help->transitionsFrom );
00041     CompressedAutomatonShrink( aut );
00042     SuffixTreeBuildHelpFree( help );
00043     if( tarjanTable ){
00044         CompressedAutomatonAddTarjanTable( aut );
00045     }
00046     fpOutput = Fopen( fileNameOutput, "wb" );
00047     CompressedAutomatonWrite( aut, fpOutput );
00048     Fclose( fpOutput );
00049     CompressedAutomatonFree( aut );
00050 }
00051 
00052 void CmdBuildCDAWG( const S8 * fileNameInput, const S8 * fileNameOutput, UINT bitsPerSymbol, UINT encoding, boolean tarjanTable ){
00053     FILE * fpInput;
00054     FILE * fpOutput;
00055     VoidSequence * line = NULL;
00056     CompressedAutomaton * aut;
00057     CDAWGBuildHelp * help;
00058     UINT i, nLine, symbolSize;
00059     mSymbolAndVariables( zero )
00060 
00061     mValidateBitsPerSymbol( bitsPerSymbol )
00062     symbolSize = bitsPerSymbol/8;
00063     mSymbolAssignValue( zero, 0, symbolSize )
00064     aut = CompressedAutomatonInit( symbolSize );
00065     help = CDAWGBuildHelpInit( aut );
00066     fpInput = Fopen( fileNameInput, "rb" );
00067     nLine = 0;
00068     while( bTRUE ){
00069         if( line != NULL ){
00070             VoidSequenceFree( line );
00071         }
00072         line = VoidSequenceReadLine( fpInput, symbolSize, encoding );
00073         if( line == NULL ){
00074             break;
00075         }
00076         nLine++;
00077         for( i = 0; i < line->seqStored; i++ ){
00078             if( SymbolToUINT( mVoidSequenceElement(line, i), symbolSize ) == 0 ){
00079                 S8 msg[MAX_INPUT_STRING_SIZE];
00080 
00081                 sprintf( msg, "Cannot process line %llu - 0 is reserved as a new symbol.", (U64)(nLine) );
00082                 Throw( msg );
00083             }
00084         }
00085         VoidSequenceAdd( line, zero );
00086         CDAWGAdd( help, line );
00087     }
00088     Fclose( fpInput );
00089     CompressedAutomatonSortTransitions( aut, help->transitionsFrom );
00090     CompressedAutomatonShrink( aut );
00091     CDAWGBuildHelpFree( help );
00092     if( tarjanTable ){
00093         CompressedAutomatonAddTarjanTable( aut );
00094     }
00095     fpOutput = Fopen( fileNameOutput, "wb" );
00096     CompressedAutomatonWrite( aut, fpOutput );
00097     Fclose( fpOutput );
00098     CompressedAutomatonFree( aut );
00099 }
00100 
00101 void CmdBuildSCDAWG( const S8 * fileNameInput, const S8 * fileNameOutput, UINT bitsPerSymbol, UINT encoding, boolean tarjanTable ){
00102     FILE * fpInput;
00103     FILE * fpOutput;
00104     VoidSequence * line = NULL;
00105     VoidSequence * sharpDocumentDollar = NULL;
00106     SCDAWG * aut;
00107     SCDAWGBuildHelp * help;
00108     UINT i, n, nLine, symbolSize;
00109     mSymbolAndVariables( zero )
00110     mSymbolAndVariables( one )
00111 
00112     mValidateBitsPerSymbol( bitsPerSymbol )
00113     symbolSize = bitsPerSymbol/8;
00114     mSymbolAssignValue( zero, 0, symbolSize )
00115     mSymbolAssignValue( one, 1, symbolSize )
00116     aut = SCDAWGInit( symbolSize );
00117     help = SCDAWGBuildHelpInit( aut );
00118     fpInput = Fopen( fileNameInput, "rb" );
00119     nLine = 0;
00120     while( bTRUE ){
00121         if( line != NULL ){
00122             VoidSequenceFree( line );
00123         }
00124         line = VoidSequenceReadLine( fpInput, symbolSize, encoding );
00125         if( line == NULL ){
00126             break;
00127         }
00128         nLine++;
00129         for( i = 0; i < line->seqStored; i++ ){
00130             n = SymbolToUINT( mVoidSequenceElement(line, i), symbolSize );
00131             if( n == 0 || n == 1 ){
00132                 S8 msg[MAX_INPUT_STRING_SIZE];
00133 
00134                 sprintf( msg, "Cannot process line %llu - 0 and 1 are reserved as new symbols. (pos is %d, sign is %d)", (U64)(nLine), i, n );
00135                 Throw( msg );
00136             }
00137         }
00138         sharpDocumentDollar = VoidSequenceInit2( symbolSize, line->seqStored + 2, 1 );
00139         VoidSequenceAdd( sharpDocumentDollar, one );
00140         VoidSequenceAppend( sharpDocumentDollar, line );
00141         VoidSequenceAdd( sharpDocumentDollar, zero );
00142         SCDAWGAdd( help, sharpDocumentDollar );
00143         VoidSequenceFree( sharpDocumentDollar );
00144     }
00145     Fclose( fpInput );
00146     SCDAWGClose( help );
00147     SCDAWGShrink( aut );
00148     SCDAWGBuildHelpFree( help );
00149     if( tarjanTable ){
00150         SCDAWGAddTarjanTable( aut );
00151     }
00152     fpOutput = Fopen( fileNameOutput, "wb" );
00153     SCDAWGWrite( aut, fpOutput );
00154     Fclose( fpOutput );
00155     SCDAWGFree( aut );
00156 }