In dieser Aufgabe wird das Daciuk-Algorithmus implementiert. Um den Daciuk-Algorithmus zu implementieren, solltest Du die Teile, die als TODO in einem Codeblock markiert werden, vervollständigen.
from itertools import count # used to create a generator to generate an id for each state.
from collections import OrderedDict
import graphviz
'''
Create a class MinDict to implement the Daciuk-Algorithm.
'''
class MinDict():
def __init__(self, words):
self.final_states : set[int] = set() # a set used to store all final states
self.curr_id = count() # create an id generator: next_id = next(curr_id)
self.start = next(self.curr_id) # the start state
self.register : set[int] = set() # a set used to store all registered states, which are inequal to each other
self.tr : dict[int, OrderedDict[str, int]] = {self.start:OrderedDict()} # a dictionary used to store all states and their transitions
self.compile(words) # compile all functions to build the final minimal dictionary.
self.language = list() # init a list used to store the words acquired via traversal
# This methode is used to calculate the common prefix of two given words.
def common_prefix(self, word, pre_word):
"""
@param word: currently read-in word
@parm pre_word: previously read-in word
return: the common prefix of both words
"""
common_prefix = '' # init the common prefix
# TODO: Return the common prefix of two words.
# pass
for i, c in enumerate(word):
if i < len(pre_word):
if c == pre_word[i]:
common_prefix += c
else:
return common_prefix
else:
return common_prefix
def split_state(self, common_prefix):
"""
@param common_predfix: a substring, the common prefix
return: the split state
"""
split_state = self.start
if common_prefix == '':
return split_state
else:
for i, c in enumerate(common_prefix):
split_state = self.tr[split_state][c]
return split_state
def has_children(self, state):
"""
@param: state id
return: boolean value, if the given state has children or not
"""
# TODO: Return True if the state has children, otherwise False.
# Hint: That self.has_children(state) is true means self.tr[state] is not an empty OrderedDict.
return self.tr[state]
def is_equal(self, s1, s2):
"""
@param: two state ids
return: boolean value, if the given states are equal
"""
# TODO: If two states s1 and s2 are equal, return True, otherwise False.
# Hint: Two conditions need to be satisfied if s1 and s2 are equal:
# a. Both s1 and s2 are final or non-final states.
# b. The transitions of s1 and s2 are the same.
return (s1 in self.final_states) == (s2 in self.final_states) \
and self.tr[s1] == self.tr[s2]
def replace_or_register(self, state):
"""
@param: a state
"""
last_child = self.tr[state][next(reversed(self.tr[state]))] # get the last child state from the state transition.
if self.has_children(last_child):
self.replace_or_register(last_child)
# replace and delete the last child state if it has an equal state in the set of the registered states.
for s in self.register:
if self.is_equal(last_child, s):
# TODO: Replace the last child with the found equal state in th registered set.
self.tr[state][next(reversed(self.tr[state]))] = s
del self.tr[last_child] # delete the last child state
return
# add the last child into the register set if no equal states are found
self.register.add(last_child)
'''
This function is used to add the current suffix after the split state
@param suffix: a substring
@param state: state id
'''
def add_suffix(self, suffix, state):
for c in suffix:
# TODO: Use the count generator self.curr_id to create the id of next state. (Replace None with your code.)
next_state = next(self.curr_id)
if state not in self.tr:
self.tr[state] = OrderedDict()
self.tr[state][c] = next_state
state = next_state
self.tr[state] = OrderedDict()
# TODO: Add the last state to the set of final states.
self.final_states.add(state)
def compile(self, words):
"""
@param: a sorted list of words
gather all parts together to implement the Daciuk-Algorithm.
"""
pre_word = ''
for word in words:
common_prefix = self.common_prefix(word, pre_word)
split_state = self.split_state(common_prefix)
current_suffix = word[len(common_prefix):]
if self.has_children(split_state):
self.replace_or_register(split_state)
self.add_suffix(current_suffix, split_state)
pre_word = word
self.replace_or_register(self.start)
self.register.add(self.start)
# define a function with the help of graphviz to draw an automoton.
def draw_automaton(states, final_states, initial_state, delta):
g = graphviz.Digraph('MinDict')
for state in states:
if state in final_states:
g.attr('node',style='bold')
if state == initial_state:
g.node(str(state),labels = '->' + str(state))
else:
g.node(str(state))
g.attr('node',style='solid')
for x, label, z in delta:
g.edge(str(x),str(z),label=' '+label+' ')
return g
def main():
dictionary = ["acker", "alle", "alraune", "as", "aspekt"]
min_dict = MinDict(dictionary) # Instantiate a minimal dictionary.
# retrieve the elements from min_dict to formalize a 5-tuple for an automoton.
start = str(0) # start state
states = {str(i) for i in min_dict.register} # set of all states
final_states = {str(i) for i in min_dict.final_states if i in min_dict.register} # set of final states
delta = set() # transition function
for p in min_dict.tr:
if p in min_dict.register:
for c in min_dict.tr[p]:
q = min_dict.tr[p][c]
delta.add((str(p), c, str(q)))
# draw the minimal dictionary
g = draw_automaton(states, final_states, start, delta)
g.render('aut1.gv', view=True)
if __name__ == '__main__':
main()
from collections import OrderedDict
import graphviz
class State():
def __init__(self):
self.is_final = False # set non-final state as default
self.tr = OrderedDict() # initialize state transitions with empty OrderedDict
self.id = None
# overload the operator "=="
def __eq__(self, other):
return self.is_final is other.is_final and self.tr == other.tr
# set attribute last child
@property
def last_child(self):
return self.tr[next(reversed(self.tr))]
@last_child.setter
def last_child(self, new_child):
self.tr[next(reversed(self.tr))] = new_child
class MinDict():
def __init__(self, words):
self.start = State()
self.register = list() # create a list to store registered states instead of set, because list can store the read-in sequence as well.
self.compile(words) # compile all functions to build the final minimal dictionary.
self.set_id() # assign each state an id
self.language = list() # init a list used to store the words acquired via traversal
# This methode is used to calculate the common prefix of two given words.
def common_prefix(self, word, pre_word):
common_prefix = ''
for i, c in enumerate(word):
if i < len(pre_word):
if c == pre_word[i]:
common_prefix += c
else:
return common_prefix
else:
return common_prefix
def split_state(self, common_prefix):
split_state = self.start
if common_prefix == '':
return split_state
else:
for c in common_prefix:
split_state = split_state.tr[c] # TODO: Update the split state. (Replace None with your own code)
return split_state
def has_children(self, state):
return state.tr
def register_or_replace(self, state):
child = state.last_child
if self.has_children(child):
self.register_or_replace(child)
for s in self.register:
if child == s:
state.last_child = s
del child
return
self.register.append(child)
def add_suffix(self, suffix, state):
for c in suffix:
next_state = State() #TODO: Create the next state by instantiating a State() class object. (Replace None with your code)
state.tr[c] = next_state
state = next_state
# pass #TODO: Set the last state as final state.
state.is_final = True
def compile(self, words):
pre_word = ''
for word in words:
common_prefix = self.common_prefix(word, pre_word)
split_state = self.split_state(common_prefix)
current_suffix = word[len(common_prefix):]
if self.has_children(split_state):
self.register_or_replace(split_state)
self.add_suffix(current_suffix, split_state)
pre_word = word
self.register_or_replace(self.start)
self.register.append(self.start)
def set_id(self):
for index, s in enumerate(self.register):
s.id = index
# Exercise 06: Traverse the automaton
# traverse the automaton to get the language
def get_language(self, state=None, transition=str()):
if state is None:
state = self.start
for c, next_state in state.tr.items():
new_transition = transition + c
if next_state.is_final:
self.language.append(new_transition) # yield
self.get_language(next_state, new_transition)
# all words are stored in self.language
# define a function with the help of graphviz to draw an automoton.
def draw_automaton(states, final_states, initial_state, delta):
g = graphviz.Digraph('MinDict')
for state in states:
if state in final_states:
g.attr('node',style='bold')
if state == initial_state:
g.node(str(state),labels = '->' + str(state))
else:
g.node(str(state))
g.attr('node',style='solid')
for x, label, z in delta:
g.edge(str(x),str(z),label=' '+label+' ')
return g
def main():
dictionary = ["acker", "alle", "alraune", "as", "aspekt"]
min_dict = MinDict(dictionary)
min_dict.get_language()
print(min_dict.language)
# retrieve the elements from min_dict to formalize a 5-tuple for an automoton.
# final_states = set()
# states = set()
# delta = set()
# start_state = str(min_dict.start.id)
# s_index = enumerate(min_dict.register)
# for state in min_dict.register:
# index = str(state.id)
# states.add(index)
# if state.is_final:
# final_states.add(index)
# for c in state.tr:
# out_state_index = str(state.tr[c].id)
# delta.add((index, c, out_state_index))
# # draw the minimal dictionary
# g = draw_automaton(states, final_states, start_state, delta)
# g.render('graphviz/aut_oop3.gv', view=True)
if __name__ == '__main__':
main()
['acker', 'alle', 'alraune', 'as', 'aspekt']