#!/nlp/projekty/metatrans2/public_html/python3/bin/python3
from collections import deque
import re
import sys
from attrs import Attrs
from chart_parser import ChartParser
from edge import Edge
from log import log
class TopDownChartParser(ChartParser):
def prepare_agenda(self):
self.agenda = deque(Edge(predicted_by_rule=rule) for rule in self.rules
if rule.left['phrase'] == self.root)
for edge in self.agenda:
log.debug("agenda %s", edge)
def propose_edges(self, edge):
for chart_edge in self.chart:
if edge.complete and not chart_edge.complete:
joined_edge = self._fundamental_rule(edge, chart_edge)
if joined_edge:
yield joined_edge, "fundamental %s" % joined_edge
if chart_edge.complete and not edge.complete:
joined_edge = self._use_closed_edges_from_chart(edge,
chart_edge)
if joined_edge:
yield joined_edge, "closed %s" % joined_edge
if edge.complete:
return
extended_edge = self._read_terminal(edge)
if extended_edge:
yield extended_edge, "terminal %s" % extended_edge
for predicted_edge in self._predict(edge):
if predicted_edge:
yield predicted_edge, "predict %s" % predicted_edge
@staticmethod
def _fundamental_rule(edge, chart_edge):
"""
if E is in the form of [A → α •, j, k]
then for each edge [B → γ • A β, i, j] in the chart
create an edge [B → γ A • β, i, k]
"""
if (edge.begin == chart_edge.end
and chart_edge.next_token.get('phrase') == edge.left['phrase']
# TODO: a co a jak porovnávám tady?
and Attrs(edge.left).match(chart_edge)):
log.debug('fundamental %s', edge)
log.debug('+ %s', chart_edge)
fundamental = Edge(extended_from=chart_edge, extended_using=edge)
fundamental.copy_attributes(edge.left)
return fundamental
@staticmethod
def _use_closed_edges_from_chart(edge, chart_edge):
"""
if E is in the form of [B → γ • A β, i, j]
then for each edge [A → α •, j, k] in the chart
create an edge [B → γ A • β, i, k].
"""
if (chart_edge.begin == edge.end and
edge.next_token.get('phrase') == chart_edge.left['phrase'] and
Attrs(chart_edge.left).match(edge)):
log.debug('closed edge %s', edge)
log.debug('+ %s', chart_edge)
if chart_edge.right:
log.critical('not just ε!')
closed = Edge(extended_from=edge, extended_using=chart_edge)
closed.right[edge.point].update(chart_edge.left)
log.debug('closed added %s', Attrs(chart_edge.left))
return closed
def _read_terminal(self, edge):
"""
if E is in the form of [A → α • aj+1 β, i, j]
create an edge [A → α aj+1 • β, i, j+1].
"""
position = edge.begin + edge.real_offset
if position >= len(self.tokens):
return None
if self._match_terminal(edge, self.tokens[position]):
new = Edge(extended_from=edge, tokens=self.tokens)
new.copy_attributes()
return new
def _match_terminal(self, edge, token):
# log.debug("try match %s = %s", edge.next_token, token)
no_attr = None
for attr, expected_value in edge.next_token.items():
if expected_value is None:
continue
elif attr not in token:
# log.debug("no attr %s in %s", attr, token)
# TODO: možná by to mělo selhat na všech chybějících atributech
if attr in ('phrase', 'lemma', 'word', 'k'):
return False
else:
no_attr = attr # pozor na „no attr“ a hned potom „match“
continue
elif token[attr] is None:
# log.debug("null attr %s (expected: %s)", attr,
# expected_value)
raise AssertionError('Attribute %s is None in %s, not %s' % (
attr, token, expected_value))
try: # elif isinstance(token[attr], str):
# dolar zajistí, že za regexem už nic nebude
if not re.match(expected_value + '$', token[attr]):
# log.debug("mismatch %s=%s (expected: %s)", attr,
# token[attr], expected_value)
return False
except TypeError: # else:
for value in token[attr]:
if re.match(expected_value + '$', value):
break
else:
log.debug("mismatch %s=%s (expected: %s)", attr,
token[attr], expected_value)
return False
if no_attr:
log.warning('noattr match %s = %s although %s is missing', token,
edge.next_token, no_attr)
else:
log.debug("match %s = %s", token, edge.next_token)
return True
def _predict(self, edge):
"""
if E is in the form of [A → α • B β, i, j]
then for each grammar rule B → γ ∈ P, create an edge [B → • γ, j, j].
"""
phrase = edge.left['phrase']
left_recursive = (edge.point == 0 and
phrase == edge.right[0].get('phrase'))
for rule in self.rules:
if left_recursive and rule.left['phrase'] == phrase:
continue
# stačí to? asi jo, na levých stranách nebývá zatím nic…
if rule.left['phrase'] == edge.next_token.get('phrase'):
yield Edge(predicted_from=edge, predicted_by_rule=rule)
if __name__ == '__main__':
top_down = TopDownChartParser(argv=sys.argv)
top_down.parse_from_vertical()