from itertools import islice, zip_longest import re from log import log, DEBUG from rule import Rule from symbols import Symbol, Symbols class Edge(Rule): counter = 0 def __init__(self, predicted_from=None, predicted_by_rule=None, extended_from=None, extended_using=None, tokens=None, how_created=None): Edge.counter += 1 self.identificator = Edge.counter self.how_created = how_created self.predicted_from = predicted_from self.extended_from = extended_from if predicted_by_rule is not None: self.rule = predicted_by_rule self.left = Symbol(predicted_by_rule.left) self.begin = 0 self.length = 0 Symbols.__init__(self, predicted_by_rule, position=0) if predicted_from is not None: self.begin = predicted_from.end self.predict_attributes() elif extended_from is not None: self.extend_from(extended_from, extended_using, tokens) self.closed = self.position == len(self) self.current_symbol = None if self.closed else self[self.position] def predict_attributes(self): symbol = self.predicted_from.current_symbol for attr, value in self.rule.left.items(): if value == True and attr in symbol: self.left[attr] = symbol[attr] # log.debug('copy %s=%s → head', attr, symbol[attr]) for attr, match_on_indexes in sorted(self.rule.attr_matches.items()): if -1 in match_on_indexes and attr in self.left: value = self.left[attr] for index in match_on_indexes[1:]: self.replace_symbol(index) self[index][attr] = value # log.debug('copy head → %s=%s', attr, value) def extend_from(self, extended_from, extended_using, tokens): self.predicted_from = extended_from.predicted_from self.rule = extended_from.rule self.left = Symbol(extended_from.left) Symbols.__init__(self, extended_from, position=extended_from.position + 1) self.replace_symbol(extended_from.position) updated_symbol = self[extended_from.position] if extended_using is not None: updated_symbol.update_from_nonterminal(extended_using.left) updated_symbol.edge = extended_using self.length += extended_using.length elif tokens: updated_symbol.update(tokens[extended_from.end]) updated_symbol.edge = tokens[extended_from.end] updated_symbol.begin = extended_from.end updated_symbol.length = 1 self.length += 1 self.copy_attributes() def copy_attributes(self): copy_from = self.extended_from.position for attr, indexes in self.rule.attr_matches.items(): try: copy_from_index = indexes.index(copy_from) except ValueError: continue # odsud se ani nemá kopírovat value = self[copy_from].get(attr) if value is None: if log.isEnabledFor(DEBUG): log.debug('no attr %s %s', attr, self[copy_from]) continue # není co kopírovat elif value == True: if log.isEnabledFor(DEBUG): log.debug('unset value %s', attr) continue # asi by se nemělo kopírovat; nejspíš nemělo if indexes[0] == -1: # kopírování do hlavy if log.isEnabledFor(DEBUG): log.debug('copy %s=%s → head', attr, value) self.left[attr] = value # kopíruje se jen na členy vpravo od právě vloženého for index in indexes[copy_from_index + 1:]: if log.isEnabledFor(DEBUG): log.debug('copy %s=%s → %s', attr, value, self[index]) self.replace_symbol(index) self[index][attr] = value def __eq__(self, other): if self.rule is not other.rule or self.position != other.position: return False elif self.left != other.left: # tohle nedělá not __eq__! return False for symbol, other_symbol in islice(zip_longest(self, other), self.position + 1): if symbol != other_symbol: return False # rekurze: hrany se mohou lišit „pod kapotou“ # např. CONSTITUENTS zahrnují CONSTITUENT, ale ten se skládá # z všeho možného elif symbol.edge != other_symbol.edge: return False # for index, symbol in enumerate(self): # if index == self.position: # break # elif symbol != other[index]: # return False return True def __hash__(self): return (hash(id(self.rule)) ^ hash(self.position) ^ hash(self.begin) ^ hash(self.length)) def __repr__(self): return 'Edge(' + str(self) + ')' def extended_by(self, closed_edge): closed_left = closed_edge.left # terminal or mismatch if self.current_symbol.phrase != closed_left.phrase: return False # TODO: tedy není potřeba kopírovat phrase, když je to stejný if not self.current_symbol.match_nonterminal(closed_left): # log.warning('mismatch %s=%s should be %s in %s ' # 'to match %s', attr, closed_left[attr], # self.current_symbol[attr], closed_left, self) return False return self.match_attributes(closed_left) def match_attributes(self, closed_left): for attr, indexes in sorted(self.rule.attr_matches.items()): value = None for index in indexes: if index == -1: continue if index < self.position: if attr not in self[index]: if log.isEnabledFor(DEBUG): log.debug(' %s not in %s', attr, self[index]) continue value = self[index][attr] elif value is not None and index == self.position: if attr not in closed_left: if log.isEnabledFor(DEBUG): log.debug('not present %s in %s', attr, closed_left) elif value != closed_left[attr]: log.error('mismatch %s %s != %s in %s and %s', attr, closed_left[attr], value, closed_left, self) return False return True @property def begin(self): return self.left.begin @property def length(self): return self.left.length @property def end(self): return self.left.end @begin.setter def begin(self, begin): self.left.begin = begin @length.setter def length(self, length): self.left.length = length