from collections import defaultdict from symbols import Symbol, Symbols, escape class Rule(Symbols): def __init__(self, rule=None, left=None, right=None): if rule is None: self.left = Symbol(left) else: if isinstance(rule, Rule): self.left = Symbol(rule.left) if right is None: right = (Symbol(s) for s in rule) else: try: left_side, right_side = rule.split(' → ') except ValueError: raise ValueError('Multiple rules stuck together? ' + rule) self.left = Symbol(left_side) if right is None: right = right_side if left: self.left.update(Symbol(left)) Symbols.__init__(self, right) self.epsilon_rules = self._process_optional_symbols() self.attr_matches = self._process_matches() def __add__(self, value): return Rule(str(self) + str(value)) def __eq__(self, other): return str(self) == str(other) def __hash__(self): return hash(str(self)) def __format__(self, format_spec=''): # exclude empty spans return format(self.left, format_spec + 's') + ' → ' + Symbols.__format__( self, format_spec) def __str__(self): if self.stringified is None: self.stringified = format(self) return self.stringified def __repr__(self): return 'Rule(' + escape(self) + ')' # TODO: byla to dobrá lekce, ale hodilo by se spíš se vrátit k __nonzero__, # které Python definoval pro Rule jako "is not None"; teď mi to rozděluje # pravidla od ε-pravidel, což vůbec nepotřebuju def _process_matches(self): attr_matches = defaultdict(list) # → OrderedDict, abych nemusel řadit for index, symbol in enumerate([self.left] + list(self), -1): for attr, value in symbol.items(): if value == True: attr_matches[attr].append(index) for attr, match_on_indexes in attr_matches.items(): if len(match_on_indexes) == 1: raise ValueError('No other symbol to match "%s" on in %s' % (attr, self)) return attr_matches def _process_optional_symbols(self): for symbol in self: if symbol.optional: yield Rule(left=Symbol(phrase=symbol.phrase), right=[]) def insert_symbols(self, inserted_symbols, before=False, inside=False, after=False, position=None, update_left=None): if isinstance(inserted_symbols, str): inserted_symbols = Symbols(inserted_symbols) left = Symbol(self.left) if isinstance(update_left, str): # TODO: nebo radši jen replace_left? update_left = Symbol(update_left) if update_left: left.update(update_left) if position is not None: new_right = Symbols(self) for symbol in reversed(inserted_symbols): new_right.insert(position, symbol) else: new_right = Symbols(inserted_symbols if before else []) if inside: for symbol in self: new_right.extend([symbol] + inserted_symbols) if not after: for _ in range(len(inserted_symbols)): new_right.pop() else: new_right.extend(self) if after: new_right.extend(inserted_symbols) return Rule(left=left, right=new_right) def remove_attributes(self, removed_attributes, except_position=None): if isinstance(removed_attributes, str): removed_attributes = Symbol(removed_attributes) new_right = Symbols() for index, symbol in enumerate(self): if index != except_position: symbol = Symbol(symbol) for attr in removed_attributes: symbol.pop(attr, None) new_right.append(symbol) return Rule(left=self.left, right=new_right)