#!/usr/bin/env python3 from collections import deque import re from attrs import Attrs from chart_parser import ChartParser from edge import Edge from log import log from rules import rules class TopDownChartParser(ChartParser): def prepare_agenda(self): self.agenda = deque(Edge(rule) for rule in self.rules if rule.left['phrase'] == self.root) for edge in self.agenda: log.debug("agenda %s", edge) def propose_edges(self, edge): for chart_edge in self.chart: if edge.complete and not chart_edge.complete: joined_edge = self._fundamental_rule(edge, chart_edge) if joined_edge: yield joined_edge, "fundamental %s" % joined_edge if chart_edge.complete and not edge.complete: joined_edge = self._use_closed_edges_from_chart(edge, chart_edge) if joined_edge: yield joined_edge, "closed %s" % joined_edge if edge.complete: return extended_edge = self._read_terminal(edge) if extended_edge: yield extended_edge, "terminal %s" % extended_edge for predicted_edge in self._predict(edge): if predicted_edge: yield predicted_edge, "predict %s" % predicted_edge @staticmethod def _fundamental_rule(edge, chart_edge): """ if E is in the form of [A → α •, j, k] then for each edge [B → γ • A β, i, j] in the chart create an edge [B → γ A • β, i, k] """ if (edge.begin == chart_edge.end and chart_edge.next_token.get('phrase') == edge.left['phrase'] # TODO: a co a jak porovnávám tady? and Attrs(edge.left).match(chart_edge)): log.debug('fundamental %s', edge) log.debug('+ %s', chart_edge) fundamental = Edge( chart_edge.rule, chart_edge.begin, edge.end, chart_edge.point + 1, chart_edge.left, chart_edge.right, chart_edge.real_offset + edge.end - edge.begin, parents=(edge, chart_edge)) fundamental.copy_attributes(edge.left) return fundamental @staticmethod def _use_closed_edges_from_chart(edge, chart_edge): """ if E is in the form of [B → γ • A β, i, j] then for each edge [A → α •, j, k] in the chart create an edge [B → γ A • β, i, k]. """ if (chart_edge.begin == edge.end and edge.next_token.get('phrase') == chart_edge.left['phrase'] and Attrs(chart_edge.left).match(edge)): log.debug('closed edge %s', edge) log.debug('+ %s', chart_edge) if chart_edge.right: log.critical('not just ε!') closed = Edge(edge.rule, edge.begin, chart_edge.end, edge.point + 1, edge.left, edge.right, edge.real_offset + chart_edge.end - chart_edge.begin, parents=(edge, chart_edge)) closed.right[edge.point].update(chart_edge.left) log.debug('closed added %s', Attrs(chart_edge.left)) return closed def _read_terminal(self, edge): """ if E is in the form of [A → α • aj+1 β, i, j] create an edge [A → α aj+1 • β, i, j+1]. """ if self._match_terminal(edge): new = Edge(edge.rule, edge.begin, edge.end + 1, edge.point + 1, edge.left, edge.right, edge.real_offset + 1, parents=(edge,)) new.right[edge.point].update( self.tokens[edge.begin + edge.real_offset]) new.copy_attributes() return new def _match_terminal(self, edge): position = edge.begin + edge.real_offset if position >= len(self.tokens): # raise Exception('WTF?') return token = self.tokens[position] for attr, value in edge.next_token.items(): try: # log.debug("atribut %s %s vs. %s", attr, token[attr], value) if value is not None and not re.match(value, token[attr]): return except KeyError: if attr in ('phrase', 'lemma', 'word', 'k'): return False # zalogovat, že není co porovnávat? asi se to (jinak) ignoruje log.debug("match %s = %s", Attrs(edge.next_token), Attrs(token)) return True def _predict(self, edge): """ if E is in the form of [A → α • B β, i, j] then for each grammar rule B → γ ∈ P, create an edge [B → • γ, j, j]. """ phrase = edge.left['phrase'] left_recursive = (edge.point == 0 and phrase == edge.right[0].get('phrase')) for rule in self.rules: if left_recursive and rule.left['phrase'] == phrase: continue # stačí to? asi jo, na levých stranách nebývá zatím nic… if rule.left['phrase'] == edge.next_token.get('phrase'): predicted = Edge(rule, edge.end, edge.end, parents=(edge,)) predicted.predict_attributes(edge.next_token) yield predicted if __name__ == '__main__': top_down = TopDownChartParser(rules) top_down.parse_from_vertical()