SIS
Symmetric Index Structures
|
00001 #include "../base.h" 00002 00003 #define mLinesGetLinesStored( object ) ( (object)->offsets->seqStored ) 00004 #define mLinesGet( object, n ) ( (object)->lines->seq + ((object)->offsets->seq[(n)]*(object)->lines->elementSize) ) 00005 #define mLinesGetLength( object, n ) ( (object)->lengths->seq[(n)] ) 00006 #define mLinesGetSymbol( object, n, pos ) ( mLinesGet( object, n ) + ((pos)*(object)->lines->elementSize) ) 00007 00008 typedef struct tLines{ 00009 VoidSequence * lines; 00010 UINTSequence * offsets; 00011 UINTSequence * lengths; 00012 } Lines; 00013 00014 static Lines * LinesInit( UINT symbolSize ){ 00015 Lines * lines; 00016 00017 lines = Malloc( 1, sizeof(Lines) ); 00018 lines->lines = VoidSequenceInit( symbolSize ); 00019 lines->offsets = UINTSequenceInit(); 00020 lines->lengths = UINTSequenceInit(); 00021 return lines; 00022 } 00023 00024 static void LinesFree( Lines * lines ){ 00025 VoidSequenceFree( lines->lines ); 00026 UINTSequenceFree( lines->offsets ); 00027 UINTSequenceFree( lines->lengths ); 00028 Free( lines ); 00029 } 00030 00031 static void LinesAdd( Lines * lines, const VoidSequence * line ){ 00032 UINTSequenceAdd( lines->offsets, lines->lines->seqStored ); 00033 UINTSequenceAdd( lines->lengths, line->seqStored ); 00034 VoidSequenceAppend( lines->lines, line ); 00035 } 00036 00037 static void LinesReverseAll( Lines * lines ){ 00038 UINT i, n, symbolSize; 00039 00040 symbolSize = lines->lines->elementSize; 00041 for( n = 0; n < mLinesGetLinesStored(lines); n++ ){ 00042 for( i = 0; i < mLinesGetLength(lines, n)/2; i++ ){ 00043 SwapSymbols( mLinesGetSymbol(lines, n, i), mLinesGetSymbol(lines, n, mLinesGetLength(lines, n) - 1 - i), symbolSize ); 00044 } 00045 } 00046 } 00047 00048 static SINT Cmp( void * ptr, UINT l1, UINT l2 ){ 00049 UINT symbolSize, m, i; 00050 Lines * lines; 00051 SINT r; 00052 00053 lines = (Lines *)(ptr); 00054 symbolSize = lines->lines->elementSize; 00055 m = mMIN( mLinesGetLength(lines, l1), mLinesGetLength(lines, l2) ); 00056 for( i = 0; i < m; i++ ){ 00057 r = CmpSymbols( mLinesGetSymbol(lines, l1, i), mLinesGetSymbol(lines, l2, i), symbolSize ); 00058 if( r != 0 ){ 00059 return r; 00060 } 00061 } 00062 if( mLinesGetLength(lines, l1) == mLinesGetLength(lines, l2) ){ 00063 return 0; 00064 } 00065 if( mLinesGetLength(lines, l1) < mLinesGetLength(lines, l2) ){ 00066 return -1; 00067 } 00068 return 1; 00069 } 00070 00071 static void Swap( void * ptr, UINT l1, UINT l2 ){ 00072 Lines * lines; 00073 UINT tmp; 00074 00075 lines = (Lines *)(ptr); 00076 mSwapVariables( lines->offsets->seq[l1], lines->offsets->seq[l2], tmp ); 00077 mSwapVariables( lines->lengths->seq[l1], lines->lengths->seq[l2], tmp ); 00078 } 00079 00080 static void LinesSort( Lines * lines ){ 00081 Sort( lines, mLinesGetLinesStored(lines), Cmp, Swap ); 00082 } 00083 00084 static void Print( Lines * lines, UINT n, FILE * fp, UINT encoding ){ 00085 UINT i; 00086 UINT symbolSize; 00087 00088 symbolSize = lines->lines->elementSize; 00089 for( i = 0; i < mLinesGetLength(lines, n); i++ ){ 00090 PrintSymbol( mLinesGetSymbol(lines, n, i), fp, symbolSize, encoding ); 00091 } 00092 PrintLine( fp, symbolSize, encoding ); 00093 } 00094 00095 void CmdReverseAndSort( const S8 * fileNameInput, const S8 * fileNameOutput, UINT bitsPerSymbol, UINT encoding ){ 00096 FILE * fpInput; 00097 FILE * fpOutput; 00098 Lines * lines; 00099 VoidSequence * line = NULL; 00100 UINT i, symbolSize; 00101 00102 mValidateBitsPerSymbol( bitsPerSymbol ) 00103 symbolSize = bitsPerSymbol/8; 00104 lines = LinesInit( symbolSize ); 00105 fpInput = Fopen( fileNameInput, "rb" ); 00106 while( bTRUE ){ 00107 if( line != NULL ){ 00108 VoidSequenceFree( line ); 00109 } 00110 line = VoidSequenceReadLine( fpInput, symbolSize, encoding ); 00111 if( line == NULL ){ 00112 break; 00113 } 00114 LinesAdd( lines, line ); 00115 } 00116 Fclose( fpInput ); 00117 LinesReverseAll( lines ); 00118 LinesSort( lines ); 00119 fpOutput = Fopen( fileNameOutput, "wb" ); 00120 if( mLinesGetLinesStored(lines) > 0 ){ 00121 Print( lines, 0, fpOutput, encoding ); 00122 for( i = 1; i < mLinesGetLinesStored(lines); i++ ){ 00123 if( Cmp( lines, i, i-1 ) != 0 ){ 00124 Print( lines, i, fpOutput, encoding ); 00125 } 00126 } 00127 } 00128 Fclose( fpOutput ); 00129 LinesFree( lines ); 00130 }