from collections import deque import itertools import sys from log import log from rule import Rule class ChartParser: def __init__(self, rules): self.import_rules(rules) self.print_rules() def parse(self, tokens, root='S'): self.tokens = tokens self.chart = [] # jak by to mohla bejt množina, když pro Edge neumím # udělat __hash__? navíc jsou její podstatné části, # dokonce už left, slovníky self.root = root self.prepare_agenda() while self.agenda: edge = self.agenda.popleft() # it might be tempting to add the edge to the chart right away # to simplify edge_is_duplicated(); but keep in mind that the edge # would “want to” combine with itself in propose_edges() log.info(' %s', edge) for new_edge, message in self.propose_edges(edge): if not self.edge_is_duplicated(new_edge, edge): log.info(message) self.agenda.append(new_edge) else: log.warning(message) self.chart.append(edge) self.print_chart() self.check() def edge_is_duplicated(self, edge, agenda_edge): return edge == agenda_edge or edge in self.agenda or edge in self.chart def print_chart(self): for edge in self.chart: # vypisuju i částečně uplatněné hrany a dokonce hrany s ε-pravidly if edge.point == 0 and edge.rule.right: continue log.info('chart %s', edge) def print_rules(self): for rule in self.rules: log.debug('rule %s', rule) log.info('\n') @staticmethod def extract_attributes_from_tag(tag): if len(tag) % 2 != 0: raise Exception('Malformed tag "%s"', tag) return ((attr, val) for (attr, val) in grouper(tag, 2)) @staticmethod def add_parsed_attibutes_to_tokens(tokens): for token in tokens: if 'tag' in token: token.update( ChartParser.extract_attributes_from_tag(token['tag'])) return tokens def parse_from_vertical(self, lines=sys.stdin): tokens = [] for line in lines: try: word, lemma, tag = line.split() except: continue token = dict(word=word, lemma=lemma, tag=tag) tokens.append(token) self.add_parsed_attibutes_to_tokens(tokens) self.parse(tokens) def import_rules(self, rules): self.rules = [] for rule in rules: rule = Rule.from_string(rule) self.rules.append(rule) for epsilon_rule in rule.epsilon_rules: self.rules.append(epsilon_rule) # už ne check, ale snaha o vypsání rodičovských hran def check(self): passed = None log.info('\n') for edge in self.chart: if (edge.begin == 0 and edge.end == len(self.tokens) and edge.complete): passed = edge log.info('Passed! %s\n', edge) # self.print_graph(edge) # to není hotové if not passed: log.error('No edge covering the whole input!') log.info('\n') # děsně to duplikuje def print_graph(self, edge): queue = deque([edge]) while queue: edge = queue.popleft() print(edge) # if isinstance(parent_edges, tuple): try: left, right = edge.parents queue.append(left) queue.append(right) continue except ValueError: pass try: parent, = edge.parents queue.append(parent) except ValueError: pass # copied from oVirt/VDSM, which in turn used # http://docs.python.org/2.6/library/itertools.html?highlight=grouper#recipes def grouper(iterable, n, fillvalue=None): "Collect data into fixed-length chunks or blocks" # grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx args = [iter(iterable)] * n return itertools.zip_longest(fillvalue=fillvalue, *args)