# Attribute List Extension for Python-Markdown # ============================================ # Adds attribute list syntax. Inspired by # [Maruku](http://maruku.rubyforge.org/proposal.html#attribute_lists)'s # feature of the same name. # See https://Python-Markdown.github.io/extensions/attr_list # for documentation. # Original code Copyright 2011 [Waylan Limberg](http://achinghead.com/). # All changes Copyright 2011-2014 The Python Markdown Project # License: [BSD](https://opensource.org/licenses/bsd-license.php) """ Adds attribute list syntax. Inspired by [Maruku](http://maruku.rubyforge.org/proposal.html#attribute_lists)'s feature of the same name. See the [documentation](https://Python-Markdown.github.io/extensions/attr_list) for details. """ from __future__ import annotations from typing import TYPE_CHECKING from . import Extension from ..treeprocessors import Treeprocessor import re if TYPE_CHECKING: # pragma: no cover from xml.etree.ElementTree import Element def _handle_double_quote(s, t): k, v = t.split('=', 1) return k, v.strip('"') def _handle_single_quote(s, t): k, v = t.split('=', 1) return k, v.strip("'") def _handle_key_value(s, t): return t.split('=', 1) def _handle_word(s, t): if t.startswith('.'): return '.', t[1:] if t.startswith('#'): return 'id', t[1:] return t, t _scanner = re.Scanner([ (r'[^ =}]+=".*?"', _handle_double_quote), (r"[^ =}]+='.*?'", _handle_single_quote), (r'[^ =}]+=[^ =}]+', _handle_key_value), (r'[^ =}]+', _handle_word), (r' ', None) ]) def get_attrs_and_remainder(attrs_string: str) -> tuple[list[tuple[str, str]], str]: """ Parse attribute list and return a list of attribute tuples. Additionally, return any text that remained after a curly brace. In typical cases, its presence should mean that the input does not match the intended attribute list syntax. """ attrs, remainder = _scanner.scan(attrs_string) # To keep historic behavior, discard all unparsable text prior to '}'. index = remainder.find('}') remainder = remainder[index:] if index != -1 else '' return attrs, remainder def get_attrs(str: str) -> list[tuple[str, str]]: # pragma: no cover """ Soft-deprecated. Prefer `get_attrs_and_remainder`. """ return get_attrs_and_remainder(str)[0] def isheader(elem: Element) -> bool: return elem.tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'] class AttrListTreeprocessor(Treeprocessor): BASE_RE = r'\{\:?[ ]*([^\}\n ][^\n]*)[ ]*\}' HEADER_RE = re.compile(r'[ ]+{}[ ]*$'.format(BASE_RE)) BLOCK_RE = re.compile(r'\n[ ]*{}[ ]*$'.format(BASE_RE)) INLINE_RE = re.compile(r'^{}'.format(BASE_RE)) NAME_RE = re.compile(r'[^A-Z_a-z\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02ff' r'\u0370-\u037d\u037f-\u1fff\u200c-\u200d' r'\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff' r'\uf900-\ufdcf\ufdf0-\ufffd' r'\:\-\.0-9\u00b7\u0300-\u036f\u203f-\u2040]+') def run(self, doc: Element) -> None: for elem in doc.iter(): if self.md.is_block_level(elem.tag): # Block level: check for `attrs` on last line of text RE = self.BLOCK_RE if isheader(elem) or elem.tag in ['dt', 'td', 'th']: # header, def-term, or table cell: check for attributes at end of element RE = self.HEADER_RE if len(elem) and elem.tag == 'li': # special case list items. children may include a `ul` or `ol`. pos = None # find the `ul` or `ol` position for i, child in enumerate(elem): if child.tag in ['ul', 'ol']: pos = i break if pos is None and elem[-1].tail: # use tail of last child. no `ul` or `ol`. m = RE.search(elem[-1].tail) if m: if not self.assign_attrs(elem, m.group(1), strict=True): elem[-1].tail = elem[-1].tail[:m.start()] elif pos is not None and pos > 0 and elem[pos-1].tail: # use tail of last child before `ul` or `ol` m = RE.search(elem[pos-1].tail) if m: if not self.assign_attrs(elem, m.group(1), strict=True): elem[pos-1].tail = elem[pos-1].tail[:m.start()] elif elem.text: # use text. `ul` is first child. m = RE.search(elem.text) if m: if not self.assign_attrs(elem, m.group(1), strict=True): elem.text = elem.text[:m.start()] elif len(elem) and elem[-1].tail: # has children. Get from tail of last child m = RE.search(elem[-1].tail) if m: if not self.assign_attrs(elem, m.group(1), strict=True): elem[-1].tail = elem[-1].tail[:m.start()] if isheader(elem): # clean up trailing #s elem[-1].tail = elem[-1].tail.rstrip('#').rstrip() elif elem.text: # no children. Get from text. m = RE.search(elem.text) if m: if not self.assign_attrs(elem, m.group(1), strict=True): elem.text = elem.text[:m.start()] if isheader(elem): # clean up trailing #s elem.text = elem.text.rstrip('#').rstrip() else: # inline: check for `attrs` at start of tail if elem.tail: m = self.INLINE_RE.match(elem.tail) if m: remainder = self.assign_attrs(elem, m.group(1)) elem.tail = elem.tail[m.end():] + remainder def assign_attrs(self, elem: Element, attrs_string: str, *, strict: bool = False) -> str: """ Assign `attrs` to element. If the `attrs_string` has an extra closing curly brace, the remaining text is returned. The `strict` argument controls whether to still assign `attrs` if there is a remaining `}`. """ attrs, remainder = get_attrs_and_remainder(attrs_string) if strict and remainder: return remainder for k, v in attrs: if k == '.': # add to class cls = elem.get('class') if cls: elem.set('class', '{} {}'.format(cls, v)) else: elem.set('class', v) else: # assign attribute `k` with `v` elem.set(self.sanitize_name(k), v) # The text that we initially over-matched will be put back. return remainder def sanitize_name(self, name: str) -> str: """ Sanitize name as 'an XML Name, minus the `:`.' See . """ return self.NAME_RE.sub('_', name) class AttrListExtension(Extension): """ Attribute List extension for Python-Markdown """ def extendMarkdown(self, md): md.treeprocessors.register(AttrListTreeprocessor(md), 'attr_list', 8) md.registerExtension(self) def makeExtension(**kwargs): # pragma: no cover return AttrListExtension(**kwargs)