from collections import defaultdict
from itertools import zip_longest
import re

from shlex import shlex


ATTRS = ['word', 'lemma'] + list('kegncpamdxytzw~')
ATTR_MAP = {attr: attr_id for (attr_id, attr) in enumerate(ATTRS)}
ATTR_IDS = list(range(len(ATTRS)))


class XmlTag:
    def __init__(self, lines=None, name=None, closing=None, empty=None,
                 attributes=None, indentation_level=0):
        self.lines = lines
        self.name = name
        self.closing = closing
        self.empty = empty
        self.attributes = attributes or []
        self.indentation_level = indentation_level

    @property
    def opening(self):
        return not self.closing

    def append(self, attr, value):
        self.attributes.append((attr, value))

    def __contains__(self, key):
        return key in (attr for attr, val in self.attributes)

    def clear(self):
        self.attributes.clear()

    def __getitem__(self, key):
        """Get the last value attr."""
        for i in range(1, len(self.attributes) + 1):  # enumerate_reversed
            attr, value = self.attributes[-i]
            if key == attr:
                return value
        else:
            return None

    def __setitem__(self, attr, value):
        """Set the value to the last occurence of attr, or append a new one."""
        for i in range(1, len(self.attributes) + 1):  # enumerate_reversed
            old_attr, _ = self.attributes[-i]
            if attr == old_attr:
                self.attributes[-i] = (attr, value)
                break
        else:
            self.attributes.append((attr, value))

    def __iter__(self):
        return iter(self.attributes)

    def __str__(self):
        opening = '' if self.opening else '/'
        empty = '/' if self.empty else ''
        attrs = ''.join(
            ' {}="{}"'.format(attr, val) for attr, val in self.attributes)
        return '{indent}<{opening}{name}{attrs}{empty}>'.format(
            opening=opening, name=self.name, attrs=attrs, empty=empty,
            indent='  '*self.indentation_level)

    def __repr__(self):
        return 'XmlTag(' + escape(self) + ')'

    @classmethod
    def parse(cls, lines):
        parser = shlex(lines[1:])  # TODO: radši jednou get_token() == '<'
        tag = cls()
        attr = None
        value = None
        while True:
            token = parser.get_token()  # ValueError("No closing quotation")
            if not token:
                break
            elif tag.closing is None:
                tag.closing = token == '/'
                if tag.opening:
                    tag.name = token
            elif tag.name is None:
                if token == '>':
                    break
                tag.name = token
            elif attr is None:
                if token == '/':
                    tag.empty = True
                elif token == '>':
                    break
                elif token == '=':
                    break  # unexpected =
                else:
                    attr = token
                    value = '='  # expecting =
            elif tag.empty:
                if token == '>':
                    break
                else:
                    break  # unexpected /
            elif value == '=':
                if token == '=':
                    value = None  # expecting a value
                elif token == '/':
                    tag.append(attr, None)
                    tag.empty = True
                elif token == '>':
                    tag.append(attr, None)
                    break
                else:
                    tag.append(attr, None)  # got an empty value
                    attr = token  # immediately followed by another
            elif value is None:
                if (len(token) > 1 and token[0] in '\'"' and
                        token[0] == token[-1]):
                    token = token[1:-1]
                tag.append(attr, token)
                attr = None  # expecting an attribute
                value = None
        return tag


def extract_attributes_from_tag(tag):
    # TODO: also return tag in .ambiguous so matching on tag works
    # MAYBE: support k1c14 (multiple-valued attributes in a short form)
    if ',' in tag:
        tags = tag.split(',')
        if '' not in tags:
            return put_ambiguous_attrs_aside(tags)
    if len(tag) % 2 != 0:
        raise ValueError('Malformed tag "{}"'.format(tag))
    return list(grouper(tag, 2)), []


def put_ambiguous_attrs_aside(tags):
    # MAYBE: support ambiguous tags of uneven length: c1,c2d1
    all_values = defaultdict(set)
    parsed_tags = []

    for tag in tags:
        if len(tag) % 2 != 0:
            raise ValueError('Malformed tag "{}"'.format(tag))
        attrs = list(grouper(tag, 2)) + [('tag', tag)]
        for attr, value in attrs:
            all_values[attr].add(value)
        parsed_tags.append(attrs)

    unambiguous = {attr: values.pop() for (attr, values) in all_values.items()
                   if len(values) == 1}
    ambiguous = []
    for attrs in parsed_tags:
        ambiguous_group = {attr: value for (attr, value) in attrs
                           if attr not in unambiguous}
        if ambiguous_group:
            ambiguous.append(ambiguous_group)
    return unambiguous, ambiguous


# copied from oVirt/VDSM, which in turn used
# http://docs.python.org/2.6/library/itertools.html?highlight=grouper#recipes
def grouper(iterable, n, fillvalue=None):
    "Collect data into fixed-length chunks or blocks"
    # grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx
    args = [iter(iterable)] * n
    return zip_longest(fillvalue=fillvalue, *args)


need_escaping = re.compile(r'[()=? \t\n]').search  # podle shlex._find_unsafe


def escape(s):  # začátek podle shlex.quote
    if not isinstance(s, str):
        s = str(s)
    if not s:
        return "''"
    elif need_escaping(s) is None:
        return s
    elif "'" not in s:
        return "'" + s + "'"
    elif '"' not in s:
        return '"' + s + '"'
    else:
        raise ValueError("Can't escape, the string includes both quotes: " + s)