harvesting_the_net/pics/lib/python3.10/site-packages/jedi/inference/references.py

import os
import re

from parso import python_bytes_to_unicode

from jedi.debug import dbg
from jedi.file_io import KnownContentFileIO, FolderIO
from jedi.inference.names import SubModuleName
from jedi.inference.imports import load_module_from_path
from jedi.inference.filters import ParserTreeFilter
from jedi.inference.gradual.conversion import convert_names

_IGNORE_FOLDERS = ('.tox', '.venv', '.mypy_cache', 'venv', '__pycache__')

_OPENED_FILE_LIMIT = 2000
"""
Stats from a 2016 Lenovo Notebook running Linux:
With os.walk, it takes about 10s to scan 11'000 files (without filesystem
caching). Once cached it only takes 5s. So it is expected that reading all
those files might take a few seconds, but not a lot more.
"""
_PARSED_FILE_LIMIT = 30
"""
For now we keep the amount of parsed files really low, since parsing might take
easily 100ms for bigger files.
"""


def _resolve_names(definition_names, avoid_names=()):
    for name in definition_names:
        if name in avoid_names:
            # Avoiding recursions here, because goto on a module name lands
            # on the same module.
            continue

        if not isinstance(name, SubModuleName):
            # SubModuleNames are not actually existing names but created
            # names when importing something like `import foo.bar.baz`.
            yield name

        if name.api_type == 'module':
            yield from _resolve_names(name.goto(), definition_names)


def _dictionarize(names):
    return dict(
        (n if n.tree_name is None else n.tree_name, n)
        for n in names
    )


def _find_defining_names(module_context, tree_name):
    found_names = _find_names(module_context, tree_name)

    for name in list(found_names):
        # Convert from/to stubs, because those might also be usages.
        found_names |= set(convert_names(
            [name],
            only_stubs=not name.get_root_context().is_stub(),
            prefer_stub_to_compiled=False
        ))

    found_names |= set(_find_global_variables(found_names, tree_name.value))
    for name in list(found_names):
        if name.api_type == 'param' or name.tree_name is None \
                or name.tree_name.parent.type == 'trailer':
            continue
        found_names |= set(_add_names_in_same_context(name.parent_context, name.string_name))
    return set(_resolve_names(found_names))


def _find_names(module_context, tree_name):
    name = module_context.create_name(tree_name)
    found_names = set(name.goto())
    found_names.add(name)

    return set(_resolve_names(found_names))


def _add_names_in_same_context(context, string_name):
    if context.tree_node is None:
        return

    until_position = None
    while True:
        filter_ = ParserTreeFilter(
            parent_context=context,
            until_position=until_position,
        )
        names = set(filter_.get(string_name))
        if not names:
            break
        yield from names
        ordered = sorted(names, key=lambda x: x.start_pos)
        until_position = ordered[0].start_pos


def _find_global_variables(names, search_name):
    for name in names:
        if name.tree_name is None:
            continue
        module_context = name.get_root_context()
        try:
            method = module_context.get_global_filter
        except AttributeError:
            continue
        else:
            for global_name in method().get(search_name):
                yield global_name
                c = module_context.create_context(global_name.tree_name)
                yield from _add_names_in_same_context(c, global_name.string_name)


def find_references(module_context, tree_name, only_in_module=False):
    inf = module_context.inference_state
    search_name = tree_name.value

    # We disable flow analysis, because if we have ifs that are only true in
    # certain cases, we want both sides.
    try:
        inf.flow_analysis_enabled = False
        found_names = _find_defining_names(module_context, tree_name)
    finally:
        inf.flow_analysis_enabled = True

    found_names_dct = _dictionarize(found_names)

    module_contexts = [module_context]
    if not only_in_module:
        for m in set(d.get_root_context() for d in found_names):
            if m != module_context and m.tree_node is not None \
                    and inf.project.path in m.py__file__().parents:
                module_contexts.append(m)
    # For param no search for other modules is necessary.
    if only_in_module or any(n.api_type == 'param' for n in found_names):
        potential_modules = module_contexts
    else:
        potential_modules = get_module_contexts_containing_name(
            inf,
            module_contexts,
            search_name,
        )

    non_matching_reference_maps = {}
    for module_context in potential_modules:
        for name_leaf in module_context.tree_node.get_used_names().get(search_name, []):
            new = _dictionarize(_find_names(module_context, name_leaf))
            if any(tree_name in found_names_dct for tree_name in new):
                found_names_dct.update(new)
                for tree_name in new:
                    for dct in non_matching_reference_maps.get(tree_name, []):
                        # A reference that was previously searched for matches
                        # with a now found name. Merge.
                        found_names_dct.update(dct)
                    try:
                        del non_matching_reference_maps[tree_name]
                    except KeyError:
                        pass
            else:
                for name in new:
                    non_matching_reference_maps.setdefault(name, []).append(new)
    result = found_names_dct.values()
    if only_in_module:
        return [n for n in result if n.get_root_context() == module_context]
    return result


def _check_fs(inference_state, file_io, regex):
    try:
        code = file_io.read()
    except FileNotFoundError:
        return None
    code = python_bytes_to_unicode(code, errors='replace')
    if not regex.search(code):
        return None
    new_file_io = KnownContentFileIO(file_io.path, code)
    m = load_module_from_path(inference_state, new_file_io)
    if m.is_compiled():
        return None
    return m.as_context()


def gitignored_paths(folder_io, file_io):
    ignored_paths_abs = set()
    ignored_paths_rel = set()

    for l in file_io.read().splitlines():
        if not l or l.startswith(b'#') or l.startswith(b'!') or b'*' in l:
            continue

        p = l.decode('utf-8', 'ignore').rstrip('/')
        if '/' in p:
            name = p.lstrip('/')
            ignored_paths_abs.add(os.path.join(folder_io.path, name))
        else:
            name = p
            ignored_paths_rel.add((folder_io.path, name))

    return ignored_paths_abs, ignored_paths_rel


def expand_relative_ignore_paths(folder_io, relative_paths):
    curr_path = folder_io.path
    return {os.path.join(curr_path, p[1]) for p in relative_paths if curr_path.startswith(p[0])}


def recurse_find_python_folders_and_files(folder_io, except_paths=()):
    except_paths = set(except_paths)
    except_paths_relative = set()

    for root_folder_io, folder_ios, file_ios in folder_io.walk():
        # Delete folders that we don't want to iterate over.
        for file_io in file_ios:
            path = file_io.path
            if path.suffix in ('.py', '.pyi'):
                if path not in except_paths:
                    yield None, file_io

            if path.name == '.gitignore':
                ignored_paths_abs, ignored_paths_rel = gitignored_paths(
                    root_folder_io, file_io
                )
                except_paths |= ignored_paths_abs
                except_paths_relative |= ignored_paths_rel

        except_paths_relative_expanded = expand_relative_ignore_paths(
            root_folder_io, except_paths_relative
        )

        folder_ios[:] = [
            folder_io
            for folder_io in folder_ios
            if folder_io.path not in except_paths
            and folder_io.path not in except_paths_relative_expanded
            and folder_io.get_base_name() not in _IGNORE_FOLDERS
        ]
        for folder_io in folder_ios:
            yield folder_io, None


def recurse_find_python_files(folder_io, except_paths=()):
    for folder_io, file_io in recurse_find_python_folders_and_files(folder_io, except_paths):
        if file_io is not None:
            yield file_io


def _find_python_files_in_sys_path(inference_state, module_contexts):
    sys_path = inference_state.get_sys_path()
    except_paths = set()
    yielded_paths = [m.py__file__() for m in module_contexts]
    for module_context in module_contexts:
        file_io = module_context.get_value().file_io
        if file_io is None:
            continue

        folder_io = file_io.get_parent_folder()
        while True:
            path = folder_io.path
            if not any(path.startswith(p) for p in sys_path) or path in except_paths:
                break
            for file_io in recurse_find_python_files(folder_io, except_paths):
                if file_io.path not in yielded_paths:
                    yield file_io
            except_paths.add(path)
            folder_io = folder_io.get_parent_folder()


def _find_project_modules(inference_state, module_contexts):
    except_ = [m.py__file__() for m in module_contexts]
    yield from recurse_find_python_files(FolderIO(inference_state.project.path), except_)


def get_module_contexts_containing_name(inference_state, module_contexts, name,
                                        limit_reduction=1):
    """
    Search a name in the directories of modules.

    :param limit_reduction: Divides the limits on opening/parsing files by this
        factor.
    """
    # Skip non python modules
    for module_context in module_contexts:
        if module_context.is_compiled():
            continue
        yield module_context

    # Very short names are not searched in other modules for now to avoid lots
    # of file lookups.
    if len(name) <= 2:
        return

    # Currently not used, because there's only `scope=project` and `scope=file`
    # At the moment there is no such thing as `scope=sys.path`.
    # file_io_iterator = _find_python_files_in_sys_path(inference_state, module_contexts)
    file_io_iterator = _find_project_modules(inference_state, module_contexts)
    yield from search_in_file_ios(inference_state, file_io_iterator, name,
                                  limit_reduction=limit_reduction)


def search_in_file_ios(inference_state, file_io_iterator, name,
                       limit_reduction=1, complete=False):
    parse_limit = _PARSED_FILE_LIMIT / limit_reduction
    open_limit = _OPENED_FILE_LIMIT / limit_reduction
    file_io_count = 0
    parsed_file_count = 0
    regex = re.compile(r'\b' + re.escape(name) + (r'' if complete else r'\b'))
    for file_io in file_io_iterator:
        file_io_count += 1
        m = _check_fs(inference_state, file_io, regex)
        if m is not None:
            parsed_file_count += 1
            yield m
            if parsed_file_count >= parse_limit:
                dbg('Hit limit of parsed files: %s', parse_limit)
                break

        if file_io_count >= open_limit:
            dbg('Hit limit of opened files: %s', open_limit)
            break
end 2 years ago			`import os`
			`import re`

			`from parso import python_bytes_to_unicode`

			`from jedi.debug import dbg`
			`from jedi.file_io import KnownContentFileIO, FolderIO`
			`from jedi.inference.names import SubModuleName`
			`from jedi.inference.imports import load_module_from_path`
			`from jedi.inference.filters import ParserTreeFilter`
			`from jedi.inference.gradual.conversion import convert_names`

			`_IGNORE_FOLDERS = ('.tox', '.venv', '.mypy_cache', 'venv', '__pycache__')`

			`_OPENED_FILE_LIMIT = 2000`
			`"""`
			`Stats from a 2016 Lenovo Notebook running Linux:`
			`With os.walk, it takes about 10s to scan 11'000 files (without filesystem`
			`caching). Once cached it only takes 5s. So it is expected that reading all`
			`those files might take a few seconds, but not a lot more.`
			`"""`
			`_PARSED_FILE_LIMIT = 30`
			`"""`
			`For now we keep the amount of parsed files really low, since parsing might take`
			`easily 100ms for bigger files.`
			`"""`


			`def _resolve_names(definition_names, avoid_names=()):`
			`for name in definition_names:`
			`if name in avoid_names:`
			`# Avoiding recursions here, because goto on a module name lands`
			`# on the same module.`
			`continue`

			`if not isinstance(name, SubModuleName):`
			`# SubModuleNames are not actually existing names but created`
			# names when importing something like `import foo.bar.baz`.
			`yield name`

			`if name.api_type == 'module':`
			`yield from _resolve_names(name.goto(), definition_names)`


			`def _dictionarize(names):`
			`return dict(`
			`(n if n.tree_name is None else n.tree_name, n)`
			`for n in names`
			`)`


			`def _find_defining_names(module_context, tree_name):`
			`found_names = _find_names(module_context, tree_name)`

			`for name in list(found_names):`
			`# Convert from/to stubs, because those might also be usages.`
			`found_names \|= set(convert_names(`
			`[name],`
			`only_stubs=not name.get_root_context().is_stub(),`
			`prefer_stub_to_compiled=False`
			`))`

			`found_names \|= set(_find_global_variables(found_names, tree_name.value))`
			`for name in list(found_names):`
			`if name.api_type == 'param' or name.tree_name is None \`
			`or name.tree_name.parent.type == 'trailer':`
			`continue`
			`found_names \|= set(_add_names_in_same_context(name.parent_context, name.string_name))`
			`return set(_resolve_names(found_names))`


			`def _find_names(module_context, tree_name):`
			`name = module_context.create_name(tree_name)`
			`found_names = set(name.goto())`
			`found_names.add(name)`

			`return set(_resolve_names(found_names))`


			`def _add_names_in_same_context(context, string_name):`
			`if context.tree_node is None:`
			`return`

			`until_position = None`
			`while True:`
			`filter_ = ParserTreeFilter(`
			`parent_context=context,`
			`until_position=until_position,`
			`)`
			`names = set(filter_.get(string_name))`
			`if not names:`
			`break`
			`yield from names`
			`ordered = sorted(names, key=lambda x: x.start_pos)`
			`until_position = ordered[0].start_pos`


			`def _find_global_variables(names, search_name):`
			`for name in names:`
			`if name.tree_name is None:`
			`continue`
			`module_context = name.get_root_context()`
			`try:`
			`method = module_context.get_global_filter`
			`except AttributeError:`
			`continue`
			`else:`
			`for global_name in method().get(search_name):`
			`yield global_name`
			`c = module_context.create_context(global_name.tree_name)`
			`yield from _add_names_in_same_context(c, global_name.string_name)`


			`def find_references(module_context, tree_name, only_in_module=False):`
			`inf = module_context.inference_state`
			`search_name = tree_name.value`

			`# We disable flow analysis, because if we have ifs that are only true in`
			`# certain cases, we want both sides.`
			`try:`
			`inf.flow_analysis_enabled = False`
			`found_names = _find_defining_names(module_context, tree_name)`
			`finally:`
			`inf.flow_analysis_enabled = True`

			`found_names_dct = _dictionarize(found_names)`

			`module_contexts = [module_context]`
			`if not only_in_module:`
			`for m in set(d.get_root_context() for d in found_names):`
			`if m != module_context and m.tree_node is not None \`
			`and inf.project.path in m.py__file__().parents:`
			`module_contexts.append(m)`
			`# For param no search for other modules is necessary.`
			`if only_in_module or any(n.api_type == 'param' for n in found_names):`
			`potential_modules = module_contexts`
			`else:`
			`potential_modules = get_module_contexts_containing_name(`
			`inf,`
			`module_contexts,`
			`search_name,`
			`)`

			`non_matching_reference_maps = {}`
			`for module_context in potential_modules:`
			`for name_leaf in module_context.tree_node.get_used_names().get(search_name, []):`
			`new = _dictionarize(_find_names(module_context, name_leaf))`
			`if any(tree_name in found_names_dct for tree_name in new):`
			`found_names_dct.update(new)`
			`for tree_name in new:`
			`for dct in non_matching_reference_maps.get(tree_name, []):`
			`# A reference that was previously searched for matches`
			`# with a now found name. Merge.`
			`found_names_dct.update(dct)`
			`try:`
			`del non_matching_reference_maps[tree_name]`
			`except KeyError:`
			`pass`
			`else:`
			`for name in new:`
			`non_matching_reference_maps.setdefault(name, []).append(new)`
			`result = found_names_dct.values()`
			`if only_in_module:`
			`return [n for n in result if n.get_root_context() == module_context]`
			`return result`


			`def _check_fs(inference_state, file_io, regex):`
			`try:`
			`code = file_io.read()`
			`except FileNotFoundError:`
			`return None`
			`code = python_bytes_to_unicode(code, errors='replace')`
			`if not regex.search(code):`
			`return None`
			`new_file_io = KnownContentFileIO(file_io.path, code)`
			`m = load_module_from_path(inference_state, new_file_io)`
			`if m.is_compiled():`
			`return None`
			`return m.as_context()`


			`def gitignored_paths(folder_io, file_io):`
			`ignored_paths_abs = set()`
			`ignored_paths_rel = set()`

			`for l in file_io.read().splitlines():`
			`if not l or l.startswith(b'#') or l.startswith(b'!') or b'*' in l:`
			`continue`

			`p = l.decode('utf-8', 'ignore').rstrip('/')`
			`if '/' in p:`
			`name = p.lstrip('/')`
			`ignored_paths_abs.add(os.path.join(folder_io.path, name))`
			`else:`
			`name = p`
			`ignored_paths_rel.add((folder_io.path, name))`

			`return ignored_paths_abs, ignored_paths_rel`


			`def expand_relative_ignore_paths(folder_io, relative_paths):`
			`curr_path = folder_io.path`
			`return {os.path.join(curr_path, p[1]) for p in relative_paths if curr_path.startswith(p[0])}`


			`def recurse_find_python_folders_and_files(folder_io, except_paths=()):`
			`except_paths = set(except_paths)`
			`except_paths_relative = set()`

			`for root_folder_io, folder_ios, file_ios in folder_io.walk():`
			`# Delete folders that we don't want to iterate over.`
			`for file_io in file_ios:`
			`path = file_io.path`
			`if path.suffix in ('.py', '.pyi'):`
			`if path not in except_paths:`
			`yield None, file_io`

			`if path.name == '.gitignore':`
			`ignored_paths_abs, ignored_paths_rel = gitignored_paths(`
			`root_folder_io, file_io`
			`)`
			`except_paths \|= ignored_paths_abs`
			`except_paths_relative \|= ignored_paths_rel`

			`except_paths_relative_expanded = expand_relative_ignore_paths(`
			`root_folder_io, except_paths_relative`
			`)`

			`folder_ios[:] = [`
			`folder_io`
			`for folder_io in folder_ios`
			`if folder_io.path not in except_paths`
			`and folder_io.path not in except_paths_relative_expanded`
			`and folder_io.get_base_name() not in _IGNORE_FOLDERS`
			`]`
			`for folder_io in folder_ios:`
			`yield folder_io, None`


			`def recurse_find_python_files(folder_io, except_paths=()):`
			`for folder_io, file_io in recurse_find_python_folders_and_files(folder_io, except_paths):`
			`if file_io is not None:`
			`yield file_io`


			`def _find_python_files_in_sys_path(inference_state, module_contexts):`
			`sys_path = inference_state.get_sys_path()`
			`except_paths = set()`
			`yielded_paths = [m.py__file__() for m in module_contexts]`
			`for module_context in module_contexts:`
			`file_io = module_context.get_value().file_io`
			`if file_io is None:`
			`continue`

			`folder_io = file_io.get_parent_folder()`
			`while True:`
			`path = folder_io.path`
			`if not any(path.startswith(p) for p in sys_path) or path in except_paths:`
			`break`
			`for file_io in recurse_find_python_files(folder_io, except_paths):`
			`if file_io.path not in yielded_paths:`
			`yield file_io`
			`except_paths.add(path)`
			`folder_io = folder_io.get_parent_folder()`


			`def _find_project_modules(inference_state, module_contexts):`
			`except_ = [m.py__file__() for m in module_contexts]`
			`yield from recurse_find_python_files(FolderIO(inference_state.project.path), except_)`


			`def get_module_contexts_containing_name(inference_state, module_contexts, name,`
			`limit_reduction=1):`
			`"""`
			`Search a name in the directories of modules.`

			`:param limit_reduction: Divides the limits on opening/parsing files by this`
			`factor.`
			`"""`
			`# Skip non python modules`
			`for module_context in module_contexts:`
			`if module_context.is_compiled():`
			`continue`
			`yield module_context`

			`# Very short names are not searched in other modules for now to avoid lots`
			`# of file lookups.`
			`if len(name) <= 2:`
			`return`

			# Currently not used, because there's only `scope=project` and `scope=file`
			# At the moment there is no such thing as `scope=sys.path`.
			`# file_io_iterator = _find_python_files_in_sys_path(inference_state, module_contexts)`
			`file_io_iterator = _find_project_modules(inference_state, module_contexts)`
			`yield from search_in_file_ios(inference_state, file_io_iterator, name,`
			`limit_reduction=limit_reduction)`


			`def search_in_file_ios(inference_state, file_io_iterator, name,`
			`limit_reduction=1, complete=False):`
			`parse_limit = _PARSED_FILE_LIMIT / limit_reduction`
			`open_limit = _OPENED_FILE_LIMIT / limit_reduction`
			`file_io_count = 0`
			`parsed_file_count = 0`
			`regex = re.compile(r'\b' + re.escape(name) + (r'' if complete else r'\b'))`
			`for file_io in file_io_iterator:`
			`file_io_count += 1`
			`m = _check_fs(inference_state, file_io, regex)`
			`if m is not None:`
			`parsed_file_count += 1`
			`yield m`
			`if parsed_file_count >= parse_limit:`
			`dbg('Hit limit of parsed files: %s', parse_limit)`
			`break`

			`if file_io_count >= open_limit:`
			`dbg('Hit limit of opened files: %s', open_limit)`
			`break`