SIS
Symmetric Index Structures
|
00001 #include "base.h" 00002 00003 VoidSequence * VoidSequenceInit( UINT elementSize ){ 00004 return VoidSequenceInit2( elementSize, 64, -1 ); 00005 } 00006 00007 VoidSequence * VoidSequenceInit2( UINT elementSize, UINT seqAlloced, SINT growth ){ 00008 VoidSequence * seq; 00009 00010 seq = (VoidSequence *)Malloc( 1, sizeof(VoidSequence) ); 00011 seq->elementSize = elementSize; 00012 seq->seqStored = 0; 00013 seq->seqAlloced = seqAlloced; 00014 seq->seq = Malloc( seqAlloced, elementSize ); 00015 seq->growth = growth; 00016 return seq; 00017 } 00018 00019 void VoidSequenceFree( VoidSequence * seq ){ 00020 Free( seq->seq ); 00021 Free( seq ); 00022 } 00023 00024 void VoidSequenceAdd( VoidSequence * seq, const void * elementToAdd ){ 00025 if( seq->seqStored == seq->seqAlloced ){ 00026 UINT mem; 00027 if( seq->growth < 0 ){ 00028 mem = seq->seqAlloced + seq->seqAlloced / ((UINT)(-seq->growth)); 00029 } 00030 else{ 00031 mem = seq->seqAlloced + seq->growth; 00032 } 00033 if( mem <= seq->seqAlloced ){ 00034 mem++; 00035 } 00036 seq->seq = Realloc( seq->seq, mem, seq->elementSize ); 00037 seq->seqAlloced = mem; 00038 } 00039 VoidSequenceSet( seq, seq->seqStored, elementToAdd ); 00040 seq->seqStored++; 00041 } 00042 00043 void VoidSequenceWrite( VoidSequence * seq, FILE * fp, UINT * sizes, UINT numberOfSizes ){ 00044 UINT i, j, offset; 00045 U8 * element; 00046 00047 if( endiannessOfThisMachine != endiannessOnWrite ){ 00048 offset = 0; 00049 for( j = 0; j < numberOfSizes; j++ ){ 00050 if( sizes[j] != 1 ){ 00051 for( i = 0; i < seq->seqStored; i++ ){ 00052 element = (U8 *)mVoidSequenceElement( seq, i ); 00053 element = element + offset; 00054 mReverseBytes( element, sizes[j] ) 00055 } 00056 } 00057 offset += sizes[j]; 00058 } 00059 for( i = 0; i < numberOfSizes; i++ ){ 00060 element = (U8 *)(sizes + i); mReverseBytes( element, sizeof(UINT) ) 00061 } 00062 element = (U8 *)(&numberOfSizes); mReverseBytes( element, sizeof(UINT) ) 00063 element = (U8 *)(&(seq->elementSize)); mReverseBytes( element, sizeof(UINT) ) 00064 element = (U8 *)(&(seq->seqStored)); mReverseBytes( element, sizeof(UINT) ) 00065 } 00066 mWriteEndianness( fp, endiannessOnWrite ) 00067 Fwrite( &numberOfSizes, sizeof(UINT), 1, fp ); 00068 Fwrite( &(seq->elementSize), sizeof(UINT), 1, fp ); 00069 Fwrite( &(seq->seqStored), sizeof(UINT), 1, fp ); 00070 if( endiannessOfThisMachine != endiannessOnWrite ){ 00071 element = (U8 *)(&numberOfSizes); mReverseBytes( element, sizeof(UINT) ) 00072 element = (U8 *)(&(seq->elementSize)); mReverseBytes( element, sizeof(UINT) ) 00073 element = (U8 *)(&(seq->seqStored)); mReverseBytes( element, sizeof(UINT) ) 00074 } 00075 Fwrite( sizes, sizeof(UINT), numberOfSizes, fp ); 00076 Fwrite( seq->seq, seq->elementSize, seq->seqStored, fp ); 00077 if( endiannessOfThisMachine != endiannessOnWrite ){ 00078 for( i = 0; i < numberOfSizes; i++ ){ 00079 element = (U8 *)(sizes + i); mReverseBytes( element, sizeof(UINT) ) 00080 } 00081 offset = 0; 00082 for( j = 0; j < numberOfSizes; j++ ){ 00083 if( sizes[j] != 1 ){ 00084 for( i = 0; i < seq->seqStored; i++ ){ 00085 element = (U8 *)mVoidSequenceElement( seq, i ); 00086 element = element + offset; 00087 mReverseBytes( element, sizes[j] ) 00088 } 00089 } 00090 offset += sizes[j]; 00091 } 00092 } 00093 } 00094 00095 VoidSequence * VoidSequenceRead( FILE * fp ){ 00096 UINT i, j, offset, endianness, numberOfSizes, elementSize, seqStored; 00097 U8 * element; 00098 UINT * sizes; 00099 VoidSequence * seq; 00100 00101 mReadEndianness( fp, endianness ) 00102 Fread( &numberOfSizes, sizeof(UINT), 1, fp ); 00103 Fread( &elementSize, sizeof(UINT), 1, fp ); 00104 Fread( &seqStored, sizeof(UINT), 1, fp ); 00105 if( endiannessOfThisMachine != endianness ){ 00106 element = (U8 *)(&numberOfSizes); mReverseBytes( element, sizeof(UINT) ) 00107 element = (U8 *)(&elementSize); mReverseBytes( element, sizeof(UINT) ) 00108 element = (U8 *)(&seqStored); mReverseBytes( element, sizeof(UINT) ) 00109 } 00110 00111 sizes = Malloc( numberOfSizes, sizeof(UINT) ); 00112 Fread( sizes, sizeof(UINT), numberOfSizes, fp ); 00113 seq = VoidSequenceInit2( elementSize, seqStored, 1 ); 00114 Fread( seq->seq, elementSize, seqStored, fp ); 00115 seq->seqStored = seqStored; 00116 if( endiannessOfThisMachine != endianness ){ 00117 for( i = 0; i < numberOfSizes; i++ ){ 00118 element = (U8 *)(sizes + i); mReverseBytes( element, sizeof(UINT) ) 00119 } 00120 offset = 0; 00121 for( j = 0; j < numberOfSizes; j++ ){ 00122 if( sizes[j] != 1 ){ 00123 for( i = 0; i < seq->seqStored; i++ ){ 00124 element = (U8 *)mVoidSequenceElement( seq, i ); 00125 element = element + offset; 00126 mReverseBytes( element, sizes[j] ) 00127 } 00128 } 00129 offset += sizes[j]; 00130 } 00131 } 00132 Free( sizes ); 00133 return seq; 00134 } 00135 00136 void VoidSequenceShrink( VoidSequence * seq ){ 00137 seq->seq = Realloc( seq->seq, seq->seqStored, seq->elementSize ); 00138 seq->seqAlloced = seq->seqStored; 00139 } 00140 00141 void VoidSequenceSet( VoidSequence * seq, UINT position, const void * element ){ 00142 memcpy( seq->seq + (position*seq->elementSize), element, seq->elementSize ); 00143 } 00144 00145 void VoidSequenceAppend( VoidSequence * seq, const VoidSequence * s ){ 00146 UINT i; 00147 00148 for( i = 0; i < s->seqStored; i++ ){ 00149 VoidSequenceAdd( seq, mVoidSequenceElement(s, i) ); 00150 } 00151 } 00152 00153 void VoidSequenceCpy( VoidSequence * dest, const VoidSequence * src ){ 00154 dest->seqStored = 0; 00155 VoidSequenceAppend( dest, src ); 00156 } 00157 00158 #define UTF8_6 (0xFC) 00159 #define UTF8_5 (0xF8) 00160 #define UTF8_4 (0xF0) 00161 #define UTF8_3 (0xE0) 00162 #define UTF8_2 (0xC0) 00163 #define UTF8_1 (0x7F) 00164 00165 static UINT Read( FILE * fp, void * symbol, UINT symbolSize, UINT encoding ){ 00166 UINT i, nBytes, ch; 00167 U8 u8; 00168 00169 if( encoding == ENCODING_PLAIN ){ 00170 if( fread( symbol, symbolSize, 1, fp ) != 1 ){ 00171 if( feof(fp) ){ 00172 return 0; 00173 } 00174 Throw( "Cannot read from file: fread failed." ); 00175 } 00176 return symbolSize; 00177 } 00178 while( bTRUE ){ 00179 nBytes = 0; 00180 if( fread( &u8, sizeof(U8), 1, fp ) != 1 ){ 00181 if( feof(fp) ){ 00182 return 0; 00183 } 00184 Throw( "Cannot read from file: fread failed." ); 00185 } 00186 if( u8 == 0xFE || u8 == 0xFF ){ 00187 continue; 00188 } 00189 else if( (u8 & UTF8_6) == UTF8_6 ){ 00190 nBytes = 6; 00191 ch = u8 & 0x01; 00192 break; 00193 } 00194 else if( (u8 & UTF8_5) == UTF8_5 ){ 00195 nBytes = 5; 00196 ch = u8 & 0x03; 00197 break; 00198 } 00199 else if( (u8 & UTF8_4) == UTF8_4 ){ 00200 nBytes = 4; 00201 ch = u8 & 0x07; 00202 break; 00203 } 00204 else if( (u8 & UTF8_3) == UTF8_3 ){ 00205 nBytes = 3; 00206 ch = u8 & 0x0F; 00207 break; 00208 } 00209 else if( (u8 & UTF8_2) == UTF8_2 ){ 00210 nBytes = 2; 00211 ch = u8 & 0x1F; 00212 break; 00213 } 00214 else if( (u8 | UTF8_1) == UTF8_1 ){ 00215 nBytes = 1; 00216 ch = u8 & 0x7F; 00217 break; 00218 } 00219 else{ 00220 S8 msg[MAX_INPUT_STRING_SIZE]; 00221 sprintf( msg, "Illegal UTF-8 symbol that starts with 0x%2x", u8 ); 00222 Throw( msg ); 00223 } 00224 } 00225 if( nBytes > symbolSize ){ 00226 S8 msg[MAX_INPUT_STRING_SIZE]; 00227 sprintf( msg, "%llu-byte UTF-8 symbol", (U64)(nBytes) ); 00228 Throw( msg ); 00229 } 00230 for( i = 1; i < nBytes; i++ ){ 00231 if( fread( &u8, sizeof(U8), 1, fp ) != 1 ){ 00232 if( feof(fp) ){ 00233 S8 msg[MAX_INPUT_STRING_SIZE]; 00234 sprintf( msg, "Illegal UTF-8 %lld-byte symbol at the end of a file", (U64)(nBytes) ); 00235 Throw( msg ); 00236 } 00237 Throw( "Cannot read from file: fread failed." ); 00238 } 00239 if( (u8 & 0x80) != 0x80 || (u8 | 0xBF) != 0xBF ){ 00240 S8 msg[MAX_INPUT_STRING_SIZE]; 00241 sprintf( msg, "Illegal UTF-8 %llu-byte symbol that contains 0x%2x", (U64)(nBytes), u8 ); 00242 Throw( msg ); 00243 } 00244 ch <<= 6; 00245 00246 ch |= (u8 & 0x3F); 00247 } 00248 UINTToSymbol( ch, symbol, symbolSize ); 00249 return nBytes; 00250 } 00251 00252 static boolean ReadSymbol( FILE * fp, void * symbol, UINT symbolSize, UINT encoding ){ 00253 UINT number, nBytes; 00254 00255 nBytes = Read( fp, symbol, symbolSize, encoding ); 00256 if( nBytes == 0 ){ 00257 return bFALSE; 00258 } 00259 number = SymbolToUINT(symbol, symbolSize); 00260 if( number == 10 ){ 00261 return bTRUE; 00262 } 00263 if( number == 13 ){ 00264 mSymbolAndVariables( nextSymbol ) 00265 mSymbolAssignValue( nextSymbol, 0, symbolSize ) 00266 UINTToSymbol( 10, symbol, symbolSize ); 00267 nBytes = Read( fp, nextSymbol, symbolSize, encoding ); 00268 if( nBytes == 0 ){ 00269 return bTRUE; 00270 } 00271 if( SymbolToUINT(nextSymbol, symbolSize) != 10 ){ 00272 Fseek( fp, -((SINT)(nBytes)), SEEK_CUR ); 00273 } 00274 } 00275 return( bTRUE ); 00276 } 00277 00278 VoidSequence * VoidSequenceReadLine( FILE * fp, UINT symbolSize, UINT encoding ){ 00279 VoidSequence * seq; 00280 00281 mSymbolAndVariables( symbol ) 00282 mSymbolAssignValue( symbol, 0, symbolSize ) 00283 seq = VoidSequenceInit( symbolSize ); 00284 while( bTRUE ){ 00285 if( !ReadSymbol( fp, symbol, symbolSize, encoding ) ){ 00286 if( seq->seqStored == 0 ){ 00287 VoidSequenceFree( seq ); 00288 return NULL; 00289 } 00290 break; 00291 } 00292 if( SymbolToUINT( symbol, symbolSize ) == 10 ){ 00293 break; 00294 } 00295 VoidSequenceAdd( seq, symbol ); 00296 } 00297 return seq; 00298 } 00299 00300 void VoidSequenceReverse( VoidSequence * seq ){ 00301 UINT i, m, k; 00302 void * tmp; 00303 00304 tmp = Malloc( 1, seq->elementSize ); 00305 m = seq->seqStored/2; 00306 for( i = 0; i < m; i++ ){ 00307 k = seq->seqStored - 1 - i; 00308 memcpy( tmp, mVoidSequenceElement(seq, i), seq->elementSize ); 00309 memcpy( mVoidSequenceElement(seq, i), mVoidSequenceElement(seq, k), seq->elementSize ); 00310 memcpy( mVoidSequenceElement(seq, k), tmp, seq->elementSize ); 00311 } 00312 Free( tmp ); 00313 } 00314 00315 void VoidSequenceTrim( VoidSequence * seq ){ 00316 UINT s, e, i; 00317 00318 for( s = 0; s < seq->seqStored && SymbolToUINT( mVoidSequenceElement(seq, s), seq->elementSize ) <= 32; s++ ); 00319 if( s == seq->seqStored ){ 00320 return; 00321 } 00322 for( e = seq->seqStored; e > 0 && SymbolToUINT( mVoidSequenceElement(seq, e-1), seq->elementSize ) <= 32; e-- ); 00323 if( s == 0 ){ 00324 seq->seqStored = e; 00325 return; 00326 } 00327 for( i = 0; s < e; s++, i++ ){ 00328 VoidSequenceSet( seq, i, mVoidSequenceElement(seq, s) ); 00329 } 00330 seq->seqStored = i; 00331 }