harvesting_the_net/pics/lib/python3.10/site-packages/pygments/lexers/bibtex.py

"""
    pygments.lexers.bibtex
    ~~~~~~~~~~~~~~~~~~~~~~

    Lexers for BibTeX bibliography data and styles

    :copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS.
    :license: BSD, see LICENSE for details.
"""

import re

from pygments.lexer import RegexLexer, ExtendedRegexLexer, include, default, \
    words
from pygments.token import Name, Comment, String, Error, Number, Text, \
    Keyword, Punctuation, Whitespace

__all__ = ['BibTeXLexer', 'BSTLexer']


class BibTeXLexer(ExtendedRegexLexer):
    """
    A lexer for BibTeX bibliography data format.

    .. versionadded:: 2.2
    """

    name = 'BibTeX'
    aliases = ['bibtex', 'bib']
    filenames = ['*.bib']
    mimetypes = ["text/x-bibtex"]
    flags = re.IGNORECASE

    ALLOWED_CHARS = r'@!$&*+\-./:;<>?\[\\\]^`|~'
    IDENTIFIER = '[{}][{}]*'.format('a-z_' + ALLOWED_CHARS, r'\w' + ALLOWED_CHARS)

    def open_brace_callback(self, match, ctx):
        opening_brace = match.group()
        ctx.opening_brace = opening_brace
        yield match.start(), Punctuation, opening_brace
        ctx.pos = match.end()

    def close_brace_callback(self, match, ctx):
        closing_brace = match.group()
        if (
            ctx.opening_brace == '{' and closing_brace != '}' or
            ctx.opening_brace == '(' and closing_brace != ')'
        ):
            yield match.start(), Error, closing_brace
        else:
            yield match.start(), Punctuation, closing_brace
        del ctx.opening_brace
        ctx.pos = match.end()

    tokens = {
        'root': [
            include('whitespace'),
            (r'@comment(?!ary)', Comment),
            ('@preamble', Name.Class, ('closing-brace', 'value', 'opening-brace')),
            ('@string', Name.Class, ('closing-brace', 'field', 'opening-brace')),
            ('@' + IDENTIFIER, Name.Class,
             ('closing-brace', 'command-body', 'opening-brace')),
            ('.+', Comment),
        ],
        'opening-brace': [
            include('whitespace'),
            (r'[{(]', open_brace_callback, '#pop'),
        ],
        'closing-brace': [
            include('whitespace'),
            (r'[})]', close_brace_callback, '#pop'),
        ],
        'command-body': [
            include('whitespace'),
            (r'[^\s\,\}]+', Name.Label, ('#pop', 'fields')),
        ],
        'fields': [
            include('whitespace'),
            (',', Punctuation, 'field'),
            default('#pop'),
        ],
        'field': [
            include('whitespace'),
            (IDENTIFIER, Name.Attribute, ('value', '=')),
            default('#pop'),
        ],
        '=': [
            include('whitespace'),
            ('=', Punctuation, '#pop'),
        ],
        'value': [
            include('whitespace'),
            (IDENTIFIER, Name.Variable),
            ('"', String, 'quoted-string'),
            (r'\{', String, 'braced-string'),
            (r'[\d]+', Number),
            ('#', Punctuation),
            default('#pop'),
        ],
        'quoted-string': [
            (r'\{', String, 'braced-string'),
            ('"', String, '#pop'),
            (r'[^\{\"]+', String),
        ],
        'braced-string': [
            (r'\{', String, '#push'),
            (r'\}', String, '#pop'),
            (r'[^\{\}]+', String),
        ],
        'whitespace': [
            (r'\s+', Whitespace),
        ],
    }


class BSTLexer(RegexLexer):
    """
    A lexer for BibTeX bibliography styles.

    .. versionadded:: 2.2
    """

    name = 'BST'
    aliases = ['bst', 'bst-pybtex']
    filenames = ['*.bst']
    flags = re.IGNORECASE | re.MULTILINE

    tokens = {
        'root': [
            include('whitespace'),
            (words(['read', 'sort']), Keyword),
            (words(['execute', 'integers', 'iterate', 'reverse', 'strings']),
             Keyword, ('group')),
            (words(['function', 'macro']), Keyword, ('group', 'group')),
            (words(['entry']), Keyword, ('group', 'group', 'group')),
        ],
        'group': [
            include('whitespace'),
            (r'\{', Punctuation, ('#pop', 'group-end', 'body')),
        ],
        'group-end': [
            include('whitespace'),
            (r'\}', Punctuation, '#pop'),
        ],
        'body': [
            include('whitespace'),
            (r"\'[^#\"\{\}\s]+", Name.Function),
            (r'[^#\"\{\}\s]+\$', Name.Builtin),
            (r'[^#\"\{\}\s]+', Name.Variable),
            (r'"[^\"]*"', String),
            (r'#-?\d+', Number),
            (r'\{', Punctuation, ('group-end', 'body')),
            default('#pop'),
        ],
        'whitespace': [
            (r'\s+', Whitespace),
            ('%.*?$', Comment.Single),
        ],
    }