SIS
Symmetric Index Structures
|
00001 #include "../base.h" 00002 00003 typedef struct tCntStarts{ 00004 VoidSequence * cnt; 00005 UINTSequence * starts; 00006 void * zero; 00007 } CntStarts; 00008 00009 static SINT Cmp( void * ptr, UINT s1, UINT s2 ){ 00010 CntStarts * cntStarts; 00011 void * str1; 00012 void * str2; 00013 UINT symbolSize; 00014 00015 cntStarts = (CntStarts *)(ptr); 00016 symbolSize = cntStarts->cnt->elementSize; 00017 str1 = mVoidSequenceElement( cntStarts->cnt, cntStarts->starts->seq[s1] ); 00018 str2 = mVoidSequenceElement( cntStarts->cnt, cntStarts->starts->seq[s2] ); 00019 while( CmpSymbols( str1, str2, symbolSize ) == 0 && SymbolToUINT( str1, symbolSize ) != 0 ){ 00020 str1 = str1 + symbolSize; 00021 str2 = str2 + symbolSize; 00022 } 00023 return CmpSymbols( str1, str2, symbolSize ); 00024 } 00025 00026 static void Swap( void * ptr, UINT s1, UINT s2 ){ 00027 CntStarts * cntStarts; 00028 UINT tmp; 00029 00030 cntStarts = (CntStarts *)(ptr); 00031 mSwapVariables( cntStarts->starts->seq[s1], cntStarts->starts->seq[s2], tmp ); 00032 } 00033 00034 static void Print( CntStarts * cntStarts, UINT s, FILE * fp, UINT encoding ){ 00035 UINT symbolSize; 00036 void * str; 00037 00038 symbolSize = cntStarts->cnt->elementSize; 00039 str = mVoidSequenceElement( cntStarts->cnt, cntStarts->starts->seq[s] ); 00040 while( SymbolToUINT( str, symbolSize ) != 0 ){ 00041 PrintSymbol( str, fp, symbolSize, encoding ); 00042 str = str + symbolSize; 00043 } 00044 PrintLine( fp, symbolSize, encoding ); 00045 } 00046 00047 void CmdGenerateSuffixes( const S8 * fileNameInput, const S8 * fileNameOutput, UINT bitsPerSymbol, UINT encoding ){ 00048 FILE * fpInput; 00049 FILE * fpOutput; 00050 VoidSequence * line = NULL; 00051 VoidSequence * cnt; 00052 UINTSequence * starts; 00053 CntStarts cntStarts; 00054 UINT i, nLine, symbolSize; 00055 mSymbolAndVariables( zero ) 00056 00057 mValidateBitsPerSymbol( bitsPerSymbol ) 00058 symbolSize = bitsPerSymbol/8; 00059 mSymbolAssignValue( zero, 0, symbolSize ) 00060 cnt = VoidSequenceInit( symbolSize ); 00061 fpInput = Fopen( fileNameInput, "rb" ); 00062 nLine = 0; 00063 while( bTRUE ){ 00064 if( line != NULL ){ 00065 VoidSequenceFree( line ); 00066 } 00067 line = VoidSequenceReadLine( fpInput, symbolSize, encoding ); 00068 if( line == NULL ){ 00069 break; 00070 } 00071 nLine++; 00072 for( i = 0; i < line->seqStored; i++ ){ 00073 if( SymbolToUINT( mVoidSequenceElement(line, i), symbolSize ) == 0 ){ 00074 S8 msg[MAX_INPUT_STRING_SIZE]; 00075 00076 sprintf( msg, "Cannot process line %llu - 0 is reserved as a new symbol.", (U64)(nLine) ); 00077 Throw( msg ); 00078 } 00079 } 00080 VoidSequenceAdd( line, zero ); 00081 VoidSequenceAppend( cnt, line ); 00082 } 00083 Fclose( fpInput ); 00084 starts = UINTSequenceInit2( cnt->seqStored, 1 ); 00085 for( i = 0; i < cnt->seqStored; i++ ){ 00086 UINTSequenceAdd( starts, i ); 00087 } 00088 cntStarts.cnt = cnt; 00089 cntStarts.starts = starts; 00090 cntStarts.zero = zero; 00091 Sort( &cntStarts, starts->seqStored, Cmp, Swap ); 00092 fpOutput = Fopen( fileNameOutput, "wb" ); 00093 if( starts->seqStored > 0 ){ 00094 Print( &cntStarts, 0, fpOutput, encoding ); 00095 for( i = 1; i < starts->seqStored; i++ ){ 00096 if( Cmp( &cntStarts, i-1, i ) != 0 ){ 00097 Print( &cntStarts, i, fpOutput, encoding ); 00098 } 00099 } 00100 } 00101 Fclose( fpOutput ); 00102 VoidSequenceFree( cnt ); 00103 UINTSequenceFree( starts ); 00104 }