# Attribute List Extension for Python-Markdown
# ============================================

# Adds attribute list syntax. Inspired by
# [Maruku](http://maruku.rubyforge.org/proposal.html#attribute_lists)'s
# feature of the same name.

# See https://Python-Markdown.github.io/extensions/attr_list
# for documentation.

# Original code Copyright 2011 [Waylan Limberg](http://achinghead.com/).

# All changes Copyright 2011-2014 The Python Markdown Project

# License: [BSD](https://opensource.org/licenses/bsd-license.php)

"""
 Adds attribute list syntax. Inspired by
[Maruku](http://maruku.rubyforge.org/proposal.html#attribute_lists)'s
feature of the same name.

See the [documentation](https://Python-Markdown.github.io/extensions/attr_list)
for details.
"""

from __future__ import annotations
from typing import TYPE_CHECKING

from . import Extension
from ..treeprocessors import Treeprocessor
import re

if TYPE_CHECKING:  # pragma: no cover
    from xml.etree.ElementTree import Element


def _handle_double_quote(s, t):
    k, v = t.split('=', 1)
    return k, v.strip('"')


def _handle_single_quote(s, t):
    k, v = t.split('=', 1)
    return k, v.strip("'")


def _handle_key_value(s, t):
    return t.split('=', 1)


def _handle_word(s, t):
    if t.startswith('.'):
        return '.', t[1:]
    if t.startswith('#'):
        return 'id', t[1:]
    return t, t


_scanner = re.Scanner([
    (r'[^ =}]+=".*?"', _handle_double_quote),
    (r"[^ =}]+='.*?'", _handle_single_quote),
    (r'[^ =}]+=[^ =}]+', _handle_key_value),
    (r'[^ =}]+', _handle_word),
    (r' ', None)
])


def get_attrs_and_remainder(attrs_string: str) -> tuple[list[tuple[str, str]], str]:
    """ Parse attribute list and return a list of attribute tuples.

    Additionally, return any text that remained after a curly brace. In typical cases, its presence
    should mean that the input does not match the intended attribute list syntax.
    """
    attrs, remainder = _scanner.scan(attrs_string)
    # To keep historic behavior, discard all unparsable text prior to '}'.
    index = remainder.find('}')
    remainder = remainder[index:] if index != -1 else ''
    return attrs, remainder


def get_attrs(str: str) -> list[tuple[str, str]]:  # pragma: no cover
    """ Soft-deprecated. Prefer `get_attrs_and_remainder`. """
    return get_attrs_and_remainder(str)[0]


def isheader(elem: Element) -> bool:
    return elem.tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']


class AttrListTreeprocessor(Treeprocessor):

    BASE_RE = r'\{\:?[ ]*([^\}\n ][^\n]*)[ ]*\}'
    HEADER_RE = re.compile(r'[ ]+{}[ ]*$'.format(BASE_RE))
    BLOCK_RE = re.compile(r'\n[ ]*{}[ ]*$'.format(BASE_RE))
    INLINE_RE = re.compile(r'^{}'.format(BASE_RE))
    NAME_RE = re.compile(r'[^A-Z_a-z\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02ff'
                         r'\u0370-\u037d\u037f-\u1fff\u200c-\u200d'
                         r'\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff'
                         r'\uf900-\ufdcf\ufdf0-\ufffd'
                         r'\:\-\.0-9\u00b7\u0300-\u036f\u203f-\u2040]+')

    def run(self, doc: Element) -> None:
        for elem in doc.iter():
            if self.md.is_block_level(elem.tag):
                # Block level: check for `attrs` on last line of text
                RE = self.BLOCK_RE
                if isheader(elem) or elem.tag in ['dt', 'td', 'th']:
                    # header, def-term, or table cell: check for attributes at end of element
                    RE = self.HEADER_RE
                if len(elem) and elem.tag == 'li':
                    # special case list items. children may include a `ul` or `ol`.
                    pos = None
                    # find the `ul` or `ol` position
                    for i, child in enumerate(elem):
                        if child.tag in ['ul', 'ol']:
                            pos = i
                            break
                    if pos is None and elem[-1].tail:
                        # use tail of last child. no `ul` or `ol`.
                        m = RE.search(elem[-1].tail)
                        if m:
                            if not self.assign_attrs(elem, m.group(1), strict=True):
                                elem[-1].tail = elem[-1].tail[:m.start()]
                    elif pos is not None and pos > 0 and elem[pos-1].tail:
                        # use tail of last child before `ul` or `ol`
                        m = RE.search(elem[pos-1].tail)
                        if m:
                            if not self.assign_attrs(elem, m.group(1), strict=True):
                                elem[pos-1].tail = elem[pos-1].tail[:m.start()]
                    elif elem.text:
                        # use text. `ul` is first child.
                        m = RE.search(elem.text)
                        if m:
                            if not self.assign_attrs(elem, m.group(1), strict=True):
                                elem.text = elem.text[:m.start()]
                elif len(elem) and elem[-1].tail:
                    # has children. Get from tail of last child
                    m = RE.search(elem[-1].tail)
                    if m:
                        if not self.assign_attrs(elem, m.group(1), strict=True):
                            elem[-1].tail = elem[-1].tail[:m.start()]
                            if isheader(elem):
                                # clean up trailing #s
                                elem[-1].tail = elem[-1].tail.rstrip('#').rstrip()
                elif elem.text:
                    # no children. Get from text.
                    m = RE.search(elem.text)
                    if m:
                        if not self.assign_attrs(elem, m.group(1), strict=True):
                            elem.text = elem.text[:m.start()]
                            if isheader(elem):
                                # clean up trailing #s
                                elem.text = elem.text.rstrip('#').rstrip()
            else:
                # inline: check for `attrs` at start of tail
                if elem.tail:
                    m = self.INLINE_RE.match(elem.tail)
                    if m:
                        remainder = self.assign_attrs(elem, m.group(1))
                        elem.tail = elem.tail[m.end():] + remainder

    def assign_attrs(self, elem: Element, attrs_string: str, *, strict: bool = False) -> str:
        """ Assign `attrs` to element.

        If the `attrs_string` has an extra closing curly brace, the remaining text is returned.

        The `strict` argument controls whether to still assign `attrs` if there is a remaining `}`.
        """
        attrs, remainder = get_attrs_and_remainder(attrs_string)
        if strict and remainder:
            return remainder

        for k, v in attrs:
            if k == '.':
                # add to class
                cls = elem.get('class')
                if cls:
                    elem.set('class', '{} {}'.format(cls, v))
                else:
                    elem.set('class', v)
            else:
                # assign attribute `k` with `v`
                elem.set(self.sanitize_name(k), v)
        # The text that we initially over-matched will be put back.
        return remainder

    def sanitize_name(self, name: str) -> str:
        """
        Sanitize name as 'an XML Name, minus the `:`.'
        See <https://www.w3.org/TR/REC-xml-names/#NT-NCName>.
        """
        return self.NAME_RE.sub('_', name)


class AttrListExtension(Extension):
    """ Attribute List extension for Python-Markdown """
    def extendMarkdown(self, md):
        md.treeprocessors.register(AttrListTreeprocessor(md), 'attr_list', 8)
        md.registerExtension(self)


def makeExtension(**kwargs):  # pragma: no cover
    return AttrListExtension(**kwargs)