SIS
Symmetric Index Structures
|
00001 #ifndef SCDAWG_C 00002 #define SCDAWG_C 00003 00004 #include "base.h" 00005 00006 #define mRight (help->aut->right) 00007 #define mLeft (help->aut->left) 00008 00009 SCDAWG * SCDAWGInit( UINT symbolSize ){ 00010 SCDAWG * aut; 00011 00012 aut = Malloc( 1, sizeof(SCDAWG) ); 00013 aut->right = CompressedAutomatonInit( symbolSize ); 00014 aut->left = CompressedAutomatonInit( symbolSize ); 00015 return aut; 00016 } 00017 00018 void SCDAWGFree( SCDAWG * aut ){ 00019 CompressedAutomatonFree( aut->right ); 00020 CompressedAutomatonFree( aut->left ); 00021 Free( aut ); 00022 } 00023 00024 void SCDAWGClose( SCDAWGBuildHelp * help ){ 00025 UINT i, j, s; 00026 00027 CompressedAutomatonSortTransitions( mRight, help->right->transitionsFrom ); 00028 for( i = 0; i < mCompressedAutomatonGetTransitionsStored(mLeft); i++ ){ 00029 s = mLeft->transitionsTo->seq[i]; 00030 j = mLeft->statesEnd->seq[s] - mLeft->transitionsStart->seq[i]; 00031 s = help->leftToRight->seq[s]; 00032 mLeft->transitionsStart->seq[i] = (mRight->statesEnd->seq[s] + 1) - help->right->statesLength->seq[s] + j; 00033 } 00034 for( i = 0; i < mCompressedAutomatonGetTransitionsStored(mLeft); i++ ){ 00035 help->left->transitionsFrom->seq[i] = help->leftToRight->seq[help->left->transitionsFrom->seq[i]]; 00036 mLeft->transitionsTo->seq[i] = help->leftToRight->seq[mLeft->transitionsTo->seq[i]]; 00037 } 00038 for( i = 0; i < mCompressedAutomatonGetStatesStored(mLeft); i++ ){ 00039 if( help->right->statesLength->seq[i] != 0 ){ 00040 mLeft->statesEnd->seq[i] = (mRight->statesEnd->seq[i] + 1) - help->right->statesLength->seq[i]; 00041 } 00042 } 00043 VoidSequenceFree( mLeft->data ); mLeft->data = mRight->data; 00044 CompressedAutomatonSortTransitions( mLeft, help->left->transitionsFrom ); 00045 mLeft->data = NULL; 00046 } 00047 00048 void SCDAWGShrink( SCDAWG * aut ){ 00049 CompressedAutomatonShrink( aut->right ); 00050 CompressedAutomatonShrink( aut->left ); 00051 } 00052 00053 void SCDAWGAddTarjanTable( SCDAWG * aut ){ 00054 CompressedAutomatonAddTarjanTable( aut->right ); 00055 aut->left->data = aut->right->data; 00056 CompressedAutomatonAddTarjanTable( aut->left ); 00057 aut->left->data = NULL; 00058 } 00059 00060 void SCDAWGWrite( SCDAWG * aut, FILE * fp ){ 00061 CompressedAutomatonWrite( aut->right, fp ); 00062 CompressedAutomatonWrite( aut->left, fp ); 00063 } 00064 00065 SCDAWG * SCDAWGRead( FILE * fp ){ 00066 SCDAWG * aut; 00067 00068 aut = (SCDAWG *)Malloc( 1, sizeof(SCDAWG) ); 00069 aut->right = CompressedAutomatonRead( fp ); 00070 aut->left = CompressedAutomatonRead( fp ); 00071 return aut; 00072 } 00073 00074 void SCDAWGDumpStat( const SCDAWG * aut, FILE * fp ){ 00075 fprintf( fp, "right automaton:\n" ); 00076 CompressedAutomatonDumpStat( aut->right, fp ); 00077 fprintf( fp, "left automaton:\n" ); 00078 CompressedAutomatonDumpStat( aut->left, fp ); 00079 fprintf( fp, "total number of transitions: %llu\n", 00080 (U64)(mCompressedAutomatonGetTransitionsStored(aut->right) + mCompressedAutomatonGetTransitionsStored(aut->left)) ); 00081 } 00082 00083 void SCDAWGDumpGV( SCDAWG * aut, FILE * fp, void (*DumpLabel)(const SCDAWG *, FILE *, UINT, boolean, UINT), UINT encoding ){ 00084 S8 tmp[MAX_INPUT_STRING_SIZE]; 00085 UINT i, s, tr, statesStored, symbolSize; 00086 00087 symbolSize = aut->right->data->elementSize; 00088 PrintString( fp, "Digraph A{ \n", symbolSize, encoding ); 00089 PrintString( fp, "rankdir=LR;\n", symbolSize, encoding ); 00090 statesStored = mCompressedAutomatonGetStatesStored( aut->right ); 00091 PrintString( fp, "node[shape = point, color = white]; \"\";\n", symbolSize, encoding ); 00092 PrintString( fp, "node [shape = circle, color = black];\n", symbolSize, encoding ); 00093 sprintf( tmp, "\"\" -> %llu;\n", (U64)(aut->right->initialState) ); 00094 PrintString( fp, tmp, symbolSize, encoding ); 00095 for( s = 0; s < statesStored; s++ ){ 00096 for( i = 0; i < aut->right->statesNumberOfTransitions->seq[s]; i++ ){ 00097 tr = aut->right->statesTransitions->seq[s] + i; 00098 sprintf( tmp, "%llu -> %llu [ label = \"", (U64)(s), (U64)(aut->right->transitionsTo->seq[tr]) ); 00099 PrintString( fp, tmp, symbolSize, encoding ); 00100 DumpLabel( aut, fp, tr, bTRUE, encoding ); 00101 PrintString( fp, "\" ];\n", symbolSize, encoding ); 00102 } 00103 for( i = 0; i < aut->left->statesNumberOfTransitions->seq[s]; i++ ){ 00104 tr = aut->left->statesTransitions->seq[s] + i; 00105 sprintf( tmp, "%llu -> %llu [ style = dashed, label = \"", (U64)(s), (U64)(aut->left->transitionsTo->seq[tr]) ); 00106 PrintString( fp, tmp, symbolSize, encoding ); 00107 DumpLabel( aut, fp, tr, bFALSE, encoding ); 00108 PrintString( fp, "\" ];\n", symbolSize, encoding ); 00109 } 00110 } 00111 PrintString( fp, "}\n", symbolSize, encoding ); 00112 } 00113 00114 SCDAWGBuildHelp * SCDAWGBuildHelpInit( SCDAWG * aut ){ 00115 SCDAWGBuildHelp * help; 00116 00117 help = Malloc( 1, sizeof(SCDAWGBuildHelp) ); 00118 help->aut = aut; 00119 help->left = CDAWGBuildHelpInit( aut->left ); 00120 help->right = CDAWGBuildHelpInit( aut->right ); 00121 help->leftToRight = UINTSequenceInit(); 00122 return help; 00123 } 00124 00125 void SCDAWGBuildHelpFree( SCDAWGBuildHelp * help ){ 00126 CDAWGBuildHelpFree( help->left ); 00127 CDAWGBuildHelpFree( help->right ); 00128 UINTSequenceFree( help->leftToRight ); 00129 Free( help ); 00130 } 00131 00132 UINT FindRightState( SCDAWGBuildHelp * help, UINT l ){ 00133 TransitionFromStartTo tfst; 00134 UINT q, tr; 00135 00136 q = help->left->statesSuffixLink->seq[l]; 00137 if( help->leftToRight->seq[q] == NO ){ 00138 help->leftToRight->seq[q] = FindRightState( help, q ); 00139 } 00140 tfst.from = help->leftToRight->seq[q]; 00141 tfst.symbol = mVoidSequenceElement( mLeft->data, mLeft->statesEnd->seq[l] - help->left->statesLength->seq[q] ); 00142 tr = HashGetIndex( help->right->hash, &tfst ); 00143 return mRight->transitionsTo->seq[tr]; 00144 } 00145 00146 void SCDAWGAdd( SCDAWGBuildHelp * help, VoidSequence * sharpDocumentDollar ){ 00147 UINT i, numberOfStates, numberOfNewStates; 00148 00149 numberOfStates = mCompressedAutomatonGetStatesStored( help->left->aut ); 00150 CDAWGAdd( help->right, sharpDocumentDollar ); 00151 VoidSequenceReverse( sharpDocumentDollar ); 00152 CDAWGAdd( help->left, sharpDocumentDollar ); 00153 00154 numberOfNewStates = mCompressedAutomatonGetStatesStored( help->left->aut ); 00155 for( i = numberOfStates; i < numberOfNewStates; i++ ){ 00156 UINTSequenceAdd( help->leftToRight, NO ); 00157 } 00158 help->leftToRight->seq[help->left->aut->initialState] = help->right->aut->initialState; 00159 for( i = numberOfStates; i < numberOfNewStates; i++ ){ 00160 if( help->leftToRight->seq[i] == NO ){ 00161 help->leftToRight->seq[i] = FindRightState( help, i ); 00162 } 00163 } 00164 } 00165 00166 #endif /* end of include guard: SCDAWG_C */