You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
276 lines
8.3 KiB
Python
276 lines
8.3 KiB
Python
2 years ago
|
import time
|
||
|
import os
|
||
|
import sys
|
||
|
import hashlib
|
||
|
import gc
|
||
|
import shutil
|
||
|
import platform
|
||
|
import logging
|
||
|
import warnings
|
||
|
import pickle
|
||
|
from pathlib import Path
|
||
|
from typing import Dict, Any
|
||
|
|
||
|
LOG = logging.getLogger(__name__)
|
||
|
|
||
|
_CACHED_FILE_MINIMUM_SURVIVAL = 60 * 10 # 10 minutes
|
||
|
"""
|
||
|
Cached files should survive at least a few minutes.
|
||
|
"""
|
||
|
|
||
|
_CACHED_FILE_MAXIMUM_SURVIVAL = 60 * 60 * 24 * 30
|
||
|
"""
|
||
|
Maximum time for a cached file to survive if it is not
|
||
|
accessed within.
|
||
|
"""
|
||
|
|
||
|
_CACHED_SIZE_TRIGGER = 600
|
||
|
"""
|
||
|
This setting limits the amount of cached files. It's basically a way to start
|
||
|
garbage collection.
|
||
|
|
||
|
The reasoning for this limit being as big as it is, is the following:
|
||
|
|
||
|
Numpy, Pandas, Matplotlib and Tensorflow together use about 500 files. This
|
||
|
makes Jedi use ~500mb of memory. Since we might want a bit more than those few
|
||
|
libraries, we just increase it a bit.
|
||
|
"""
|
||
|
|
||
|
_PICKLE_VERSION = 33
|
||
|
"""
|
||
|
Version number (integer) for file system cache.
|
||
|
|
||
|
Increment this number when there are any incompatible changes in
|
||
|
the parser tree classes. For example, the following changes
|
||
|
are regarded as incompatible.
|
||
|
|
||
|
- A class name is changed.
|
||
|
- A class is moved to another module.
|
||
|
- A __slot__ of a class is changed.
|
||
|
"""
|
||
|
|
||
|
_VERSION_TAG = '%s-%s%s-%s' % (
|
||
|
platform.python_implementation(),
|
||
|
sys.version_info[0],
|
||
|
sys.version_info[1],
|
||
|
_PICKLE_VERSION
|
||
|
)
|
||
|
"""
|
||
|
Short name for distinguish Python implementations and versions.
|
||
|
|
||
|
It's a bit similar to `sys.implementation.cache_tag`.
|
||
|
See: http://docs.python.org/3/library/sys.html#sys.implementation
|
||
|
"""
|
||
|
|
||
|
|
||
|
def _get_default_cache_path():
|
||
|
if platform.system().lower() == 'windows':
|
||
|
dir_ = Path(os.getenv('LOCALAPPDATA') or '~', 'Parso', 'Parso')
|
||
|
elif platform.system().lower() == 'darwin':
|
||
|
dir_ = Path('~', 'Library', 'Caches', 'Parso')
|
||
|
else:
|
||
|
dir_ = Path(os.getenv('XDG_CACHE_HOME') or '~/.cache', 'parso')
|
||
|
return dir_.expanduser()
|
||
|
|
||
|
|
||
|
_default_cache_path = _get_default_cache_path()
|
||
|
"""
|
||
|
The path where the cache is stored.
|
||
|
|
||
|
On Linux, this defaults to ``~/.cache/parso/``, on OS X to
|
||
|
``~/Library/Caches/Parso/`` and on Windows to ``%LOCALAPPDATA%\\Parso\\Parso\\``.
|
||
|
On Linux, if environment variable ``$XDG_CACHE_HOME`` is set,
|
||
|
``$XDG_CACHE_HOME/parso`` is used instead of the default one.
|
||
|
"""
|
||
|
|
||
|
_CACHE_CLEAR_THRESHOLD = 60 * 60 * 24
|
||
|
|
||
|
|
||
|
def _get_cache_clear_lock_path(cache_path=None):
|
||
|
"""
|
||
|
The path where the cache lock is stored.
|
||
|
|
||
|
Cache lock will prevent continous cache clearing and only allow garbage
|
||
|
collection once a day (can be configured in _CACHE_CLEAR_THRESHOLD).
|
||
|
"""
|
||
|
cache_path = cache_path or _default_cache_path
|
||
|
return cache_path.joinpath("PARSO-CACHE-LOCK")
|
||
|
|
||
|
|
||
|
parser_cache: Dict[str, Any] = {}
|
||
|
|
||
|
|
||
|
class _NodeCacheItem:
|
||
|
def __init__(self, node, lines, change_time=None):
|
||
|
self.node = node
|
||
|
self.lines = lines
|
||
|
if change_time is None:
|
||
|
change_time = time.time()
|
||
|
self.change_time = change_time
|
||
|
self.last_used = change_time
|
||
|
|
||
|
|
||
|
def load_module(hashed_grammar, file_io, cache_path=None):
|
||
|
"""
|
||
|
Returns a module or None, if it fails.
|
||
|
"""
|
||
|
p_time = file_io.get_last_modified()
|
||
|
if p_time is None:
|
||
|
return None
|
||
|
|
||
|
try:
|
||
|
module_cache_item = parser_cache[hashed_grammar][file_io.path]
|
||
|
if p_time <= module_cache_item.change_time:
|
||
|
module_cache_item.last_used = time.time()
|
||
|
return module_cache_item.node
|
||
|
except KeyError:
|
||
|
return _load_from_file_system(
|
||
|
hashed_grammar,
|
||
|
file_io.path,
|
||
|
p_time,
|
||
|
cache_path=cache_path
|
||
|
)
|
||
|
|
||
|
|
||
|
def _load_from_file_system(hashed_grammar, path, p_time, cache_path=None):
|
||
|
cache_path = _get_hashed_path(hashed_grammar, path, cache_path=cache_path)
|
||
|
try:
|
||
|
if p_time > os.path.getmtime(cache_path):
|
||
|
# Cache is outdated
|
||
|
return None
|
||
|
|
||
|
with open(cache_path, 'rb') as f:
|
||
|
gc.disable()
|
||
|
try:
|
||
|
module_cache_item = pickle.load(f)
|
||
|
finally:
|
||
|
gc.enable()
|
||
|
except FileNotFoundError:
|
||
|
return None
|
||
|
else:
|
||
|
_set_cache_item(hashed_grammar, path, module_cache_item)
|
||
|
LOG.debug('pickle loaded: %s', path)
|
||
|
return module_cache_item.node
|
||
|
|
||
|
|
||
|
def _set_cache_item(hashed_grammar, path, module_cache_item):
|
||
|
if sum(len(v) for v in parser_cache.values()) >= _CACHED_SIZE_TRIGGER:
|
||
|
# Garbage collection of old cache files.
|
||
|
# We are basically throwing everything away that hasn't been accessed
|
||
|
# in 10 minutes.
|
||
|
cutoff_time = time.time() - _CACHED_FILE_MINIMUM_SURVIVAL
|
||
|
for key, path_to_item_map in parser_cache.items():
|
||
|
parser_cache[key] = {
|
||
|
path: node_item
|
||
|
for path, node_item in path_to_item_map.items()
|
||
|
if node_item.last_used > cutoff_time
|
||
|
}
|
||
|
|
||
|
parser_cache.setdefault(hashed_grammar, {})[path] = module_cache_item
|
||
|
|
||
|
|
||
|
def try_to_save_module(hashed_grammar, file_io, module, lines, pickling=True, cache_path=None):
|
||
|
path = file_io.path
|
||
|
try:
|
||
|
p_time = None if path is None else file_io.get_last_modified()
|
||
|
except OSError:
|
||
|
p_time = None
|
||
|
pickling = False
|
||
|
|
||
|
item = _NodeCacheItem(module, lines, p_time)
|
||
|
_set_cache_item(hashed_grammar, path, item)
|
||
|
if pickling and path is not None:
|
||
|
try:
|
||
|
_save_to_file_system(hashed_grammar, path, item, cache_path=cache_path)
|
||
|
except PermissionError:
|
||
|
# It's not really a big issue if the cache cannot be saved to the
|
||
|
# file system. It's still in RAM in that case. However we should
|
||
|
# still warn the user that this is happening.
|
||
|
warnings.warn(
|
||
|
'Tried to save a file to %s, but got permission denied.' % path,
|
||
|
Warning
|
||
|
)
|
||
|
else:
|
||
|
_remove_cache_and_update_lock(cache_path=cache_path)
|
||
|
|
||
|
|
||
|
def _save_to_file_system(hashed_grammar, path, item, cache_path=None):
|
||
|
with open(_get_hashed_path(hashed_grammar, path, cache_path=cache_path), 'wb') as f:
|
||
|
pickle.dump(item, f, pickle.HIGHEST_PROTOCOL)
|
||
|
|
||
|
|
||
|
def clear_cache(cache_path=None):
|
||
|
if cache_path is None:
|
||
|
cache_path = _default_cache_path
|
||
|
shutil.rmtree(cache_path)
|
||
|
parser_cache.clear()
|
||
|
|
||
|
|
||
|
def clear_inactive_cache(
|
||
|
cache_path=None,
|
||
|
inactivity_threshold=_CACHED_FILE_MAXIMUM_SURVIVAL,
|
||
|
):
|
||
|
if cache_path is None:
|
||
|
cache_path = _default_cache_path
|
||
|
if not cache_path.exists():
|
||
|
return False
|
||
|
for dirname in os.listdir(cache_path):
|
||
|
version_path = cache_path.joinpath(dirname)
|
||
|
if not version_path.is_dir():
|
||
|
continue
|
||
|
for file in os.scandir(version_path):
|
||
|
if file.stat().st_atime + _CACHED_FILE_MAXIMUM_SURVIVAL <= time.time():
|
||
|
try:
|
||
|
os.remove(file.path)
|
||
|
except OSError: # silently ignore all failures
|
||
|
continue
|
||
|
else:
|
||
|
return True
|
||
|
|
||
|
|
||
|
def _touch(path):
|
||
|
try:
|
||
|
os.utime(path, None)
|
||
|
except FileNotFoundError:
|
||
|
try:
|
||
|
file = open(path, 'a')
|
||
|
file.close()
|
||
|
except (OSError, IOError): # TODO Maybe log this?
|
||
|
return False
|
||
|
return True
|
||
|
|
||
|
|
||
|
def _remove_cache_and_update_lock(cache_path=None):
|
||
|
lock_path = _get_cache_clear_lock_path(cache_path=cache_path)
|
||
|
try:
|
||
|
clear_lock_time = os.path.getmtime(lock_path)
|
||
|
except FileNotFoundError:
|
||
|
clear_lock_time = None
|
||
|
if (
|
||
|
clear_lock_time is None # first time
|
||
|
or clear_lock_time + _CACHE_CLEAR_THRESHOLD <= time.time()
|
||
|
):
|
||
|
if not _touch(lock_path):
|
||
|
# First make sure that as few as possible other cleanup jobs also
|
||
|
# get started. There is still a race condition but it's probably
|
||
|
# not a big problem.
|
||
|
return False
|
||
|
|
||
|
clear_inactive_cache(cache_path=cache_path)
|
||
|
|
||
|
|
||
|
def _get_hashed_path(hashed_grammar, path, cache_path=None):
|
||
|
directory = _get_cache_directory_path(cache_path=cache_path)
|
||
|
|
||
|
file_hash = hashlib.sha256(str(path).encode("utf-8")).hexdigest()
|
||
|
return os.path.join(directory, '%s-%s.pkl' % (hashed_grammar, file_hash))
|
||
|
|
||
|
|
||
|
def _get_cache_directory_path(cache_path=None):
|
||
|
if cache_path is None:
|
||
|
cache_path = _default_cache_path
|
||
|
directory = cache_path.joinpath(_VERSION_TAG)
|
||
|
if not directory.exists():
|
||
|
os.makedirs(directory)
|
||
|
return directory
|