from collections import defaultdict
from shlex import shlex

from attrs import Attrs
from log import log


class Rule:
    """
    Should be declared and kept as immutable so it can be hashed
    (including its subclass, Edge)
    """
    def __init__(self, left, right):
        self.left = left
        self.right = right
        self.epsilon_rules = list(self._process_optional_tokens())
        self.attr_matches = self._process_matches()

    def __str__(self):
        right = [str(Attrs(attrs)) for attrs in self.right]
        if 'point' in self.__dict__:
            right.insert(self.point, '•')
            if len(right) == 1:  # ε-rule with • now inserted
                right.insert(0, 'ε')  # or just right = list('ε•')
        elif not right:
            right = ['ε']
        span = ('{}–{} '.format(self.begin, self.end)
                if 'begin' in self.__dict__ else '')
        return span + str(Attrs(self.left)) + ' → ' + ' '.join(right)

    def _process_matches(self):
        attr_matches = defaultdict(list)  # → OrderedDict, abych nemusel řadit
        for index, token in enumerate([self.left] + list(self.right), -1):
            for attr, value in token.items():
                if value is None:
                    attr_matches[attr].append(index)

        for attr, match_on_indexes in attr_matches.items():
            if len(match_on_indexes) == 1:
                raise ValueError('No other rule token to match "%s" on' % attr)
        return attr_matches

    @classmethod
    def from_string(cls, rule):
        left, right = rule.split(' → ')
        left = next(cls.read_rule(left))
        right = list(cls.read_rule(right))
        return cls(left, right)

    @staticmethod
    def tokenize_rule(rule):
        parser = shlex(rule)  # can’t handle Czech without parentheses
        while True:
            token = parser.get_token()
            if not token:
                yield  # None as another signal of termination
                break
            yield token

    @staticmethod
    def read_rule(rule):
        """
        Return a list of symbols, i.e. (non-)terminals from a string
        (which has already been split to the left/right side)
        """
        attrs = None  # not None → inside the token (= the whole "attribute",
                      # the group inside parentheses)

        attr = ''
        equal_sign = False
        for token in Rule.tokenize_rule(rule):
            if token == '(':
                if attrs is None:
                    attrs = {}
                    continue
                elif attrs is not None and (
                        'phrase' not in attrs or attrs['phrase']):
                    # nebyl „?“; i když byl, ať je společný code-flow
                    yield attrs
                    attrs = {}
                    continue
                else:  # za výjimečných okolností (a=( b=1)?
                    raise ValueError('Unexpected "("')
            elif token is None:
                if attrs is not None:
                    yield attrs
                attrs = None
                break
            elif attrs is None:
                raise ValueError('Outside token: ' + token)
            elif token == ')':
                if equal_sign:
                    raise ValueError('Unterminated attribute ' + attr)
                elif attr:
                    attrs[attr] = None
                if 'phrase' in attrs and attrs['phrase'] is None:
                    del attrs['phrase']
                # může bejt „?“
                attr = ''
                equal_sign = False
                continue
            elif token == '?':
                attrs['optional'] = True
                continue
                # yield attrs
                # attrs = None

            if not attr:
                if token == '=':
                    raise ValueError('Unexpected "="')

            if not attr and 'phrase' not in attrs:
                if token.islower():  # c, tag, word…
                    attrs['phrase'] = None  # not the phrase head name
                    attr = token
                else:
                    attrs['phrase'] = token
                continue

            if not attr:
                attr = token
            elif token == '=':
                equal_sign = True
            elif equal_sign:
                quote = token[0] if token[0] in '"\'' else None
                if quote and token[-1] != quote:
                    raise ValueError('Not properly quoted: ' + attr)
                if quote:
                    token = token[1:-1]
                attrs[attr] = token
                attr = ''
                equal_sign = False
            else:
                attrs[attr] = None
                attr = token

        if attrs is not None:
            raise ValueError('Rule not terminated by ")"')

    def _process_optional_tokens(self):
        for index, token in enumerate(self.right, 1):
            if token.get('optional', False):
                if 'phrase' not in token:
                    new_token = dict(token)
                    del new_token['optional']

                    # token['word'] = None
                    token.pop('word', None)
                    token.pop('lemma', None)
                    token.pop('tag', None)
                    token['phrase'] = self.left['phrase'] + str(index)

                    new_head = dict(token)
                    del new_head['optional']
                    yield Rule(new_head, [new_token])
                yield Rule(dict(phrase=token['phrase']), [])