|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
# Natural Language Toolkit: Compatibility
|
|
|
|
#
|
|
|
|
# Copyright (C) 2001-2020 NLTK Project
|
|
|
|
#
|
|
|
|
# URL: <http://nltk.org/>
|
|
|
|
# For license information, see LICENSE.TXT
|
|
|
|
|
|
|
|
import os
|
|
|
|
from functools import wraps
|
|
|
|
|
|
|
|
# ======= Compatibility for datasets that care about Python versions ========
|
|
|
|
|
|
|
|
# The following datasets have a /PY3 subdirectory containing
|
|
|
|
# a full copy of the data which has been re-encoded or repickled.
|
|
|
|
DATA_UPDATES = [
|
|
|
|
("chunkers", "maxent_ne_chunker"),
|
|
|
|
("help", "tagsets"),
|
|
|
|
("taggers", "maxent_treebank_pos_tagger"),
|
|
|
|
("tokenizers", "punkt"),
|
|
|
|
]
|
|
|
|
|
|
|
|
_PY3_DATA_UPDATES = [os.path.join(*path_list) for path_list in DATA_UPDATES]
|
|
|
|
|
|
|
|
def add_py3_data(path):
|
|
|
|
for item in _PY3_DATA_UPDATES:
|
|
|
|
if item in str(path) and "/PY3" not in str(path):
|
|
|
|
pos = path.index(item) + len(item)
|
|
|
|
if path[pos : pos + 4] == ".zip":
|
|
|
|
pos += 4
|
|
|
|
path = path[:pos] + "/PY3" + path[pos:]
|
|
|
|
break
|
|
|
|
return path
|
|
|
|
|
|
|
|
|
|
|
|
# for use in adding /PY3 to the second (filename) argument
|
|
|
|
# of the file pointers in data.py
|
|
|
|
def py3_data(init_func):
|
|
|
|
def _decorator(*args, **kwargs):
|
|
|
|
args = (args[0], add_py3_data(args[1])) + args[2:]
|
|
|
|
return init_func(*args, **kwargs)
|
|
|
|
|
|
|
|
return wraps(init_func)(_decorator)
|