#!/nlp/projekty/metatrans2/public_html/python3/bin/python3 from collections import deque import re import sys from attrs import Attrs from chart_parser import ChartParser from edge import Edge from log import log class TopDownChartParser(ChartParser): def prepare_agenda(self): self.agenda = deque(Edge(predicted_by_rule=rule) for rule in self.rules if rule.left['phrase'] == self.root) for edge in self.agenda: log.debug("agenda %s", edge) def propose_edges(self, edge): for chart_edge in self.chart: if edge.complete and not chart_edge.complete: joined_edge = self._fundamental_rule(edge, chart_edge) if joined_edge: yield joined_edge, "fundamental %s" % joined_edge if chart_edge.complete and not edge.complete: joined_edge = self._use_closed_edges_from_chart(edge, chart_edge) if joined_edge: yield joined_edge, "closed %s" % joined_edge if edge.complete: return extended_edge = self._read_terminal(edge) if extended_edge: yield extended_edge, "terminal %s" % extended_edge for predicted_edge in self._predict(edge): if predicted_edge: yield predicted_edge, "predict %s" % predicted_edge @staticmethod def _fundamental_rule(edge, chart_edge): """ if E is in the form of [A → α •, j, k] then for each edge [B → γ • A β, i, j] in the chart create an edge [B → γ A • β, i, k] """ if (edge.begin == chart_edge.end and chart_edge.next_token.get('phrase') == edge.left['phrase'] # TODO: a co a jak porovnávám tady? and Attrs(edge.left).match(chart_edge)): log.debug('fundamental %s', edge) log.debug('+ %s', chart_edge) fundamental = Edge(extended_from=chart_edge, extended_using=edge) fundamental.copy_attributes(edge.left) return fundamental @staticmethod def _use_closed_edges_from_chart(edge, chart_edge): """ if E is in the form of [B → γ • A β, i, j] then for each edge [A → α •, j, k] in the chart create an edge [B → γ A • β, i, k]. """ if (chart_edge.begin == edge.end and edge.next_token.get('phrase') == chart_edge.left['phrase'] and Attrs(chart_edge.left).match(edge)): log.debug('closed edge %s', edge) log.debug('+ %s', chart_edge) if chart_edge.right: log.critical('not just ε!') closed = Edge(extended_from=edge, extended_using=chart_edge) closed.right[edge.point].update(chart_edge.left) log.debug('closed added %s', Attrs(chart_edge.left)) return closed def _read_terminal(self, edge): """ if E is in the form of [A → α • aj+1 β, i, j] create an edge [A → α aj+1 • β, i, j+1]. """ position = edge.begin + edge.real_offset if position >= len(self.tokens): return None if self._match_terminal(edge, self.tokens[position]): new = Edge(extended_from=edge, tokens=self.tokens) new.copy_attributes() return new def _match_terminal(self, edge, token): # log.debug("try match %s = %s", edge.next_token, token) no_attr = None for attr, expected_value in edge.next_token.items(): if expected_value is None: continue elif attr not in token: # log.debug("no attr %s in %s", attr, token) # TODO: možná by to mělo selhat na všech chybějících atributech if attr in ('phrase', 'lemma', 'word', 'k'): return False else: no_attr = attr # pozor na „no attr“ a hned potom „match“ continue elif token[attr] is None: # log.debug("null attr %s (expected: %s)", attr, # expected_value) raise AssertionError('Attribute %s is None in %s, not %s' % ( attr, token, expected_value)) try: # elif isinstance(token[attr], str): # dolar zajistí, že za regexem už nic nebude if not re.match(expected_value + '$', token[attr]): # log.debug("mismatch %s=%s (expected: %s)", attr, # token[attr], expected_value) return False except TypeError: # else: for value in token[attr]: if re.match(expected_value + '$', value): break else: log.debug("mismatch %s=%s (expected: %s)", attr, token[attr], expected_value) return False if no_attr: log.warning('noattr match %s = %s although %s is missing', token, edge.next_token, no_attr) else: log.debug("match %s = %s", token, edge.next_token) return True def _predict(self, edge): """ if E is in the form of [A → α • B β, i, j] then for each grammar rule B → γ ∈ P, create an edge [B → • γ, j, j]. """ phrase = edge.left['phrase'] left_recursive = (edge.point == 0 and phrase == edge.right[0].get('phrase')) for rule in self.rules: if left_recursive and rule.left['phrase'] == phrase: continue # stačí to? asi jo, na levých stranách nebývá zatím nic… if rule.left['phrase'] == edge.next_token.get('phrase'): yield Edge(predicted_from=edge, predicted_by_rule=rule) if __name__ == '__main__': top_down = TopDownChartParser(argv=sys.argv) top_down.parse_from_vertical()