SIS
Symmetric Index Structures
/Users/dbr/ma/src/bas/lml/voidSequence.c
Go to the documentation of this file.
00001 #include "base.h"
00002 
00003 VoidSequence * VoidSequenceInit( UINT elementSize ){
00004     return VoidSequenceInit2( elementSize, 64, -1 );
00005 }
00006 
00007 VoidSequence * VoidSequenceInit2( UINT elementSize, UINT seqAlloced, SINT growth ){
00008     VoidSequence * seq;
00009 
00010     seq = (VoidSequence *)Malloc( 1, sizeof(VoidSequence) );
00011     seq->elementSize = elementSize;
00012     seq->seqStored = 0;
00013     seq->seqAlloced = seqAlloced;
00014     seq->seq = Malloc( seqAlloced, elementSize );
00015     seq->growth = growth;
00016     return seq;
00017 }
00018 
00019 void VoidSequenceFree( VoidSequence * seq ){
00020     Free( seq->seq );
00021     Free( seq );
00022 }
00023 
00024 void VoidSequenceAdd( VoidSequence * seq, const void * elementToAdd ){
00025     if( seq->seqStored == seq->seqAlloced ){
00026         UINT mem;
00027         if( seq->growth < 0 ){
00028             mem = seq->seqAlloced + seq->seqAlloced / ((UINT)(-seq->growth));
00029         }
00030         else{
00031             mem = seq->seqAlloced + seq->growth;
00032         }
00033         if( mem <= seq->seqAlloced ){
00034             mem++;
00035         }
00036         seq->seq = Realloc( seq->seq, mem, seq->elementSize );
00037         seq->seqAlloced = mem;
00038     }
00039     VoidSequenceSet( seq, seq->seqStored, elementToAdd );
00040     seq->seqStored++;
00041 }
00042 
00043 void VoidSequenceWrite( VoidSequence * seq, FILE * fp, UINT * sizes, UINT numberOfSizes ){
00044     UINT i, j, offset;
00045     U8 * element;
00046 
00047     if( endiannessOfThisMachine != endiannessOnWrite ){
00048         offset = 0;
00049         for( j = 0; j < numberOfSizes; j++ ){
00050             if( sizes[j] != 1 ){
00051                 for( i = 0; i < seq->seqStored; i++ ){
00052                     element = (U8 *)mVoidSequenceElement( seq, i );
00053                     element = element + offset;
00054                     mReverseBytes( element, sizes[j] )
00055                 }
00056             }
00057             offset += sizes[j];
00058         }
00059         for( i = 0; i < numberOfSizes; i++ ){
00060             element = (U8 *)(sizes + i); mReverseBytes( element, sizeof(UINT) )
00061         }
00062         element = (U8 *)(&numberOfSizes); mReverseBytes( element, sizeof(UINT) )
00063         element = (U8 *)(&(seq->elementSize)); mReverseBytes( element, sizeof(UINT) )
00064         element = (U8 *)(&(seq->seqStored)); mReverseBytes( element, sizeof(UINT) )
00065     }
00066     mWriteEndianness( fp, endiannessOnWrite )
00067     Fwrite( &numberOfSizes, sizeof(UINT), 1, fp );
00068     Fwrite( &(seq->elementSize), sizeof(UINT), 1, fp );
00069     Fwrite( &(seq->seqStored), sizeof(UINT), 1, fp );
00070     if( endiannessOfThisMachine != endiannessOnWrite ){
00071         element = (U8 *)(&numberOfSizes); mReverseBytes( element, sizeof(UINT) )
00072         element = (U8 *)(&(seq->elementSize)); mReverseBytes( element, sizeof(UINT) )
00073         element = (U8 *)(&(seq->seqStored)); mReverseBytes( element, sizeof(UINT) )
00074     }
00075     Fwrite( sizes, sizeof(UINT), numberOfSizes, fp );
00076     Fwrite( seq->seq, seq->elementSize, seq->seqStored, fp );
00077     if( endiannessOfThisMachine != endiannessOnWrite ){
00078         for( i = 0; i < numberOfSizes; i++ ){
00079             element = (U8 *)(sizes + i); mReverseBytes( element, sizeof(UINT) )
00080         }
00081         offset = 0;
00082         for( j = 0; j < numberOfSizes; j++ ){
00083             if( sizes[j] != 1 ){
00084                 for( i = 0; i < seq->seqStored; i++ ){
00085                     element = (U8 *)mVoidSequenceElement( seq, i );
00086                     element = element + offset;
00087                     mReverseBytes( element, sizes[j] )
00088                 }
00089             }
00090             offset += sizes[j];
00091         }
00092     }
00093 }
00094 
00095 VoidSequence * VoidSequenceRead( FILE * fp ){
00096     UINT i, j, offset, endianness, numberOfSizes, elementSize, seqStored;
00097     U8 * element;
00098     UINT * sizes;
00099     VoidSequence * seq;
00100 
00101     mReadEndianness( fp, endianness )
00102     Fread( &numberOfSizes, sizeof(UINT), 1, fp );
00103     Fread( &elementSize, sizeof(UINT), 1, fp );
00104     Fread( &seqStored, sizeof(UINT), 1, fp );
00105     if( endiannessOfThisMachine != endianness ){
00106         element = (U8 *)(&numberOfSizes); mReverseBytes( element, sizeof(UINT) )
00107         element = (U8 *)(&elementSize); mReverseBytes( element, sizeof(UINT) )
00108         element = (U8 *)(&seqStored); mReverseBytes( element, sizeof(UINT) )
00109     }
00110 
00111     sizes = Malloc( numberOfSizes, sizeof(UINT) );
00112     Fread( sizes, sizeof(UINT), numberOfSizes, fp );
00113     seq = VoidSequenceInit2( elementSize, seqStored, 1 );
00114     Fread( seq->seq, elementSize, seqStored, fp );
00115     seq->seqStored = seqStored;
00116     if( endiannessOfThisMachine != endianness ){
00117         for( i = 0; i < numberOfSizes; i++ ){
00118             element = (U8 *)(sizes + i); mReverseBytes( element, sizeof(UINT) )
00119         }
00120         offset = 0;
00121         for( j = 0; j < numberOfSizes; j++ ){
00122             if( sizes[j] != 1 ){
00123                 for( i = 0; i < seq->seqStored; i++ ){
00124                     element = (U8 *)mVoidSequenceElement( seq, i );
00125                     element = element + offset;
00126                     mReverseBytes( element, sizes[j] )
00127                 }
00128             }
00129             offset += sizes[j];
00130         }
00131     }
00132     Free( sizes );
00133     return seq;
00134 }
00135 
00136 void VoidSequenceShrink( VoidSequence * seq ){
00137     seq->seq = Realloc( seq->seq, seq->seqStored, seq->elementSize );
00138     seq->seqAlloced = seq->seqStored;
00139 }
00140 
00141 void VoidSequenceSet( VoidSequence * seq, UINT position, const void * element ){
00142     memcpy( seq->seq + (position*seq->elementSize), element, seq->elementSize );
00143 }
00144 
00145 void VoidSequenceAppend( VoidSequence * seq, const VoidSequence * s ){
00146     UINT i;
00147 
00148     for( i = 0; i < s->seqStored; i++ ){
00149         VoidSequenceAdd( seq, mVoidSequenceElement(s, i) );
00150     }
00151 }
00152 
00153 void VoidSequenceCpy( VoidSequence * dest, const VoidSequence * src ){
00154     dest->seqStored = 0;
00155     VoidSequenceAppend( dest, src );
00156 }
00157 
00158 #define UTF8_6 (0xFC)
00159 #define UTF8_5 (0xF8)
00160 #define UTF8_4 (0xF0)
00161 #define UTF8_3 (0xE0)
00162 #define UTF8_2 (0xC0)
00163 #define UTF8_1 (0x7F)
00164 
00165 static UINT Read( FILE * fp, void * symbol, UINT symbolSize, UINT encoding ){
00166     UINT i, nBytes, ch;
00167     U8 u8;
00168 
00169     if( encoding == ENCODING_PLAIN ){
00170         if( fread( symbol, symbolSize, 1, fp ) != 1 ){
00171             if( feof(fp) ){
00172                 return 0;
00173             }
00174             Throw( "Cannot read from file: fread failed." );
00175         }
00176         return symbolSize;
00177     }
00178     while( bTRUE ){
00179         nBytes = 0;
00180         if( fread( &u8, sizeof(U8), 1, fp ) != 1 ){
00181             if( feof(fp) ){
00182                 return 0;
00183             }
00184             Throw( "Cannot read from file: fread failed." );
00185         }
00186         if( u8 == 0xFE || u8 == 0xFF ){
00187             continue;
00188         }
00189         else if( (u8 & UTF8_6) == UTF8_6 ){
00190             nBytes = 6;
00191             ch = u8 & 0x01;
00192             break;
00193         }
00194         else if( (u8 & UTF8_5) == UTF8_5 ){
00195             nBytes = 5;
00196             ch = u8 & 0x03;
00197             break;
00198         }
00199         else if( (u8 & UTF8_4) == UTF8_4 ){
00200             nBytes = 4;
00201             ch = u8 & 0x07;
00202             break;
00203         }
00204         else if( (u8 & UTF8_3) == UTF8_3 ){
00205             nBytes = 3;
00206             ch = u8 & 0x0F;
00207             break;
00208         }
00209         else if( (u8 & UTF8_2) == UTF8_2 ){
00210             nBytes = 2;
00211             ch = u8 & 0x1F;
00212             break;
00213         }
00214         else if( (u8 | UTF8_1) == UTF8_1 ){
00215             nBytes = 1;
00216             ch = u8 & 0x7F;
00217             break;
00218         }
00219         else{
00220             S8 msg[MAX_INPUT_STRING_SIZE];
00221             sprintf( msg, "Illegal UTF-8 symbol that starts with 0x%2x", u8 );
00222             Throw( msg );
00223         }
00224     }
00225     if( nBytes > symbolSize ){
00226         S8 msg[MAX_INPUT_STRING_SIZE];
00227         sprintf( msg, "%llu-byte UTF-8 symbol", (U64)(nBytes) );
00228         Throw( msg );
00229     }
00230     for( i = 1; i < nBytes; i++ ){
00231         if( fread( &u8, sizeof(U8), 1, fp ) != 1 ){
00232             if( feof(fp) ){
00233                 S8 msg[MAX_INPUT_STRING_SIZE];
00234                 sprintf( msg, "Illegal UTF-8 %lld-byte symbol at the end of a file", (U64)(nBytes) );
00235                 Throw( msg );
00236             }
00237             Throw( "Cannot read from file: fread failed." );
00238         }
00239         if( (u8 & 0x80) != 0x80 || (u8 | 0xBF) != 0xBF ){
00240             S8 msg[MAX_INPUT_STRING_SIZE];
00241             sprintf( msg, "Illegal UTF-8 %llu-byte symbol that contains 0x%2x", (U64)(nBytes), u8 );
00242             Throw( msg );
00243         }
00244         ch <<= 6;
00245 
00246         ch |= (u8 & 0x3F);
00247     }
00248     UINTToSymbol( ch, symbol, symbolSize );
00249     return nBytes;
00250 }
00251 
00252 static boolean ReadSymbol( FILE * fp, void * symbol, UINT symbolSize, UINT encoding ){
00253     UINT number, nBytes;
00254 
00255     nBytes = Read( fp, symbol, symbolSize, encoding );
00256     if( nBytes == 0 ){
00257         return bFALSE;
00258     }
00259     number = SymbolToUINT(symbol, symbolSize);
00260     if( number == 10 ){
00261         return bTRUE;
00262     }
00263     if( number == 13 ){
00264         mSymbolAndVariables( nextSymbol )
00265         mSymbolAssignValue( nextSymbol, 0, symbolSize )
00266         UINTToSymbol( 10, symbol, symbolSize );
00267         nBytes = Read( fp, nextSymbol, symbolSize, encoding );
00268         if( nBytes == 0 ){
00269             return bTRUE;
00270         }
00271         if( SymbolToUINT(nextSymbol, symbolSize) != 10 ){
00272             Fseek( fp, -((SINT)(nBytes)), SEEK_CUR );
00273         }
00274     }
00275     return( bTRUE );
00276 }
00277 
00278 VoidSequence * VoidSequenceReadLine( FILE * fp, UINT symbolSize, UINT encoding ){
00279     VoidSequence * seq;
00280 
00281     mSymbolAndVariables( symbol )
00282     mSymbolAssignValue( symbol, 0, symbolSize )
00283     seq = VoidSequenceInit( symbolSize );
00284     while( bTRUE ){
00285         if( !ReadSymbol( fp, symbol, symbolSize, encoding ) ){
00286             if( seq->seqStored == 0 ){
00287                 VoidSequenceFree( seq );
00288                 return NULL;
00289             }
00290             break;
00291         }
00292         if( SymbolToUINT( symbol, symbolSize ) == 10 ){
00293             break;
00294         }
00295         VoidSequenceAdd( seq, symbol );
00296     }
00297     return seq;
00298 }
00299 
00300 void VoidSequenceReverse( VoidSequence * seq ){
00301     UINT i, m, k;
00302     void * tmp;
00303 
00304     tmp = Malloc( 1, seq->elementSize );
00305     m = seq->seqStored/2;
00306     for( i = 0; i < m; i++ ){
00307         k = seq->seqStored - 1 - i;
00308         memcpy( tmp, mVoidSequenceElement(seq, i), seq->elementSize );
00309         memcpy( mVoidSequenceElement(seq, i), mVoidSequenceElement(seq, k), seq->elementSize );
00310         memcpy( mVoidSequenceElement(seq, k), tmp, seq->elementSize );
00311     }
00312     Free( tmp );
00313 }
00314 
00315 void VoidSequenceTrim( VoidSequence * seq ){
00316     UINT s, e, i;
00317 
00318     for( s = 0; s < seq->seqStored && SymbolToUINT( mVoidSequenceElement(seq, s), seq->elementSize ) <= 32; s++ );
00319     if( s == seq->seqStored ){
00320         return;
00321     }
00322     for( e = seq->seqStored; e > 0 && SymbolToUINT( mVoidSequenceElement(seq, e-1), seq->elementSize ) <= 32; e-- );
00323     if( s == 0 ){
00324         seq->seqStored = e;
00325         return;
00326     }
00327     for( i = 0; s < e; s++, i++ ){
00328         VoidSequenceSet( seq, i, mVoidSequenceElement(seq, s) );
00329     }
00330     seq->seqStored = i;
00331 }