bo-graduation/venv/lib/python3.7/site-packages/joblib/_memory_helpers.py

try:
    # Available in Python 3
    from tokenize import open as open_py_source

except ImportError:
    # Copied from python3 tokenize
    from codecs import lookup, BOM_UTF8
    import re
    from io import TextIOWrapper, open
    cookie_re = re.compile(r"coding[:=]\s*([-\w.]+)")

    def _get_normal_name(orig_enc):
        """Imitates get_normal_name in tokenizer.c."""
        # Only care about the first 12 characters.
        enc = orig_enc[:12].lower().replace("_", "-")
        if enc == "utf-8" or enc.startswith("utf-8-"):
            return "utf-8"
        if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or \
           enc.startswith(("latin-1-", "iso-8859-1-", "iso-latin-1-")):
            return "iso-8859-1"
        return orig_enc

    def _detect_encoding(readline):
        """
        The detect_encoding() function is used to detect the encoding that
        should be used to decode a Python source file.  It requires one
        argment, readline, in the same way as the tokenize() generator.

        It will call readline a maximum of twice, and return the encoding used
        (as a string) and a list of any lines (left as bytes) it has read in.

        It detects the encoding from the presence of a utf-8 bom or an encoding
        cookie as specified in pep-0263.  If both a bom and a cookie are
        present, but disagree, a SyntaxError will be raised.  If the encoding
        cookie is an invalid charset, raise a SyntaxError.  Note that if a
        utf-8 bom is found, 'utf-8-sig' is returned.

        If no encoding is specified, then the default of 'utf-8' will be
        returned.
        """
        bom_found = False
        encoding = None
        default = 'utf-8'

        def read_or_stop():
            try:
                return readline()
            except StopIteration:
                return b''

        def find_cookie(line):
            try:
                line_string = line.decode('ascii')
            except UnicodeDecodeError:
                return None

            matches = cookie_re.findall(line_string)
            if not matches:
                return None
            encoding = _get_normal_name(matches[0])
            try:
                codec = lookup(encoding)
            except LookupError:
                # This behaviour mimics the Python interpreter
                raise SyntaxError("unknown encoding: " + encoding)

            if bom_found:
                if codec.name != 'utf-8':
                    # This behaviour mimics the Python interpreter
                    raise SyntaxError('encoding problem: utf-8')
                encoding += '-sig'
            return encoding

        first = read_or_stop()
        if first.startswith(BOM_UTF8):
            bom_found = True
            first = first[3:]
            default = 'utf-8-sig'
        if not first:
            return default, []

        encoding = find_cookie(first)
        if encoding:
            return encoding, [first]

        second = read_or_stop()
        if not second:
            return default, [first]

        encoding = find_cookie(second)
        if encoding:
            return encoding, [first, second]

        return default, [first, second]

    def open_py_source(filename):
        """Open a file in read only mode using the encoding detected by
        detect_encoding().
        """
        buffer = open(filename, 'rb')
        encoding, lines = _detect_encoding(buffer.readline)
        buffer.seek(0)
        text = TextIOWrapper(buffer, encoding, line_buffering=True)
        text.mode = 'r'
        return text
add tag_comparison_v3.py 5 years ago			`try:`
			`# Available in Python 3`
			`from tokenize import open as open_py_source`

			`except ImportError:`
			`# Copied from python3 tokenize`
			`from codecs import lookup, BOM_UTF8`
			`import re`
			`from io import TextIOWrapper, open`
			`cookie_re = re.compile(r"coding[:=]\s*([-\w.]+)")`

			`def _get_normal_name(orig_enc):`
			`"""Imitates get_normal_name in tokenizer.c."""`
			`# Only care about the first 12 characters.`
			`enc = orig_enc[:12].lower().replace("_", "-")`
			`if enc == "utf-8" or enc.startswith("utf-8-"):`
			`return "utf-8"`
			`if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or \`
			`enc.startswith(("latin-1-", "iso-8859-1-", "iso-latin-1-")):`
			`return "iso-8859-1"`
			`return orig_enc`

			`def _detect_encoding(readline):`
			`"""`
			`The detect_encoding() function is used to detect the encoding that`
			`should be used to decode a Python source file. It requires one`
			`argment, readline, in the same way as the tokenize() generator.`

			`It will call readline a maximum of twice, and return the encoding used`
			`(as a string) and a list of any lines (left as bytes) it has read in.`

			`It detects the encoding from the presence of a utf-8 bom or an encoding`
			`cookie as specified in pep-0263. If both a bom and a cookie are`
			`present, but disagree, a SyntaxError will be raised. If the encoding`
			`cookie is an invalid charset, raise a SyntaxError. Note that if a`
			`utf-8 bom is found, 'utf-8-sig' is returned.`

			`If no encoding is specified, then the default of 'utf-8' will be`
			`returned.`
			`"""`
			`bom_found = False`
			`encoding = None`
			`default = 'utf-8'`

			`def read_or_stop():`
			`try:`
			`return readline()`
			`except StopIteration:`
			`return b''`

			`def find_cookie(line):`
			`try:`
			`line_string = line.decode('ascii')`
			`except UnicodeDecodeError:`
			`return None`

			`matches = cookie_re.findall(line_string)`
			`if not matches:`
			`return None`
			`encoding = _get_normal_name(matches[0])`
			`try:`
			`codec = lookup(encoding)`
			`except LookupError:`
			`# This behaviour mimics the Python interpreter`
			`raise SyntaxError("unknown encoding: " + encoding)`

			`if bom_found:`
			`if codec.name != 'utf-8':`
			`# This behaviour mimics the Python interpreter`
			`raise SyntaxError('encoding problem: utf-8')`
			`encoding += '-sig'`
			`return encoding`

			`first = read_or_stop()`
			`if first.startswith(BOM_UTF8):`
			`bom_found = True`
			`first = first[3:]`
			`default = 'utf-8-sig'`
			`if not first:`
			`return default, []`

			`encoding = find_cookie(first)`
			`if encoding:`
			`return encoding, [first]`

			`second = read_or_stop()`
			`if not second:`
			`return default, [first]`

			`encoding = find_cookie(second)`
			`if encoding:`
			`return encoding, [first, second]`

			`return default, [first, second]`

			`def open_py_source(filename):`
			`"""Open a file in read only mode using the encoding detected by`
			`detect_encoding().`
			`"""`
			`buffer = open(filename, 'rb')`
			`encoding, lines = _detect_encoding(buffer.readline)`
			`buffer.seek(0)`
			`text = TextIOWrapper(buffer, encoding, line_buffering=True)`
			`text.mode = 'r'`
			`return text`